diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,90999 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2826268197268005, + "eval_steps": 250, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002565253639453601, + "grad_norm": 175.5614776611328, + "learning_rate": 5e-06, + "loss": 10.4885, + "num_input_tokens_seen": 125588, + "step": 1 + }, + { + "epoch": 0.0002565253639453601, + "loss": 10.224580764770508, + "loss_ce": 6.224580764770508, + "loss_iou": 1.15625, + "loss_num": 0.337890625, + "loss_xval": 4.0, + "num_input_tokens_seen": 125588, + "step": 1 + }, + { + "epoch": 0.0005130507278907202, + "grad_norm": 172.5997772216797, + "learning_rate": 5e-06, + "loss": 10.2395, + "num_input_tokens_seen": 251900, + "step": 2 + }, + { + "epoch": 0.0005130507278907202, + "loss": 10.229927062988281, + "loss_ce": 6.075629234313965, + "loss_iou": 1.203125, + "loss_num": 0.349609375, + "loss_xval": 4.15625, + "num_input_tokens_seen": 251900, + "step": 2 + }, + { + "epoch": 0.0007695760918360803, + "grad_norm": 175.2003936767578, + "learning_rate": 5e-06, + "loss": 10.4067, + "num_input_tokens_seen": 377912, + "step": 3 + }, + { + "epoch": 0.0007695760918360803, + "loss": 10.339284896850586, + "loss_ce": 6.198660373687744, + "loss_iou": 1.203125, + "loss_num": 0.34765625, + "loss_xval": 4.125, + "num_input_tokens_seen": 377912, + "step": 3 + }, + { + "epoch": 0.0010261014557814403, + "grad_norm": 173.7275848388672, + "learning_rate": 5e-06, + "loss": 10.1685, + "num_input_tokens_seen": 504228, + "step": 4 + }, + { + "epoch": 0.0010261014557814403, + "loss": 10.215723037719727, + "loss_ce": 6.137598514556885, + "loss_iou": 1.1875, + "loss_num": 0.33984375, + "loss_xval": 4.0625, + "num_input_tokens_seen": 504228, + "step": 4 + }, + { + "epoch": 0.0012826268197268006, + "grad_norm": 176.56124877929688, + "learning_rate": 5e-06, + "loss": 10.1392, + "num_input_tokens_seen": 630944, + "step": 5 + }, + { + "epoch": 0.0012826268197268006, + "eval_icons_CIoU": -0.17809736728668213, + "eval_icons_GIoU": -0.11384276673197746, + "eval_icons_IoU": 0.0024869858752936125, + "eval_icons_MAE_all": 0.3313166946172714, + "eval_icons_MAE_h": 0.2745002806186676, + "eval_icons_MAE_w": 0.3184145539999008, + "eval_icons_MAE_x_boxes": 0.22741428017616272, + "eval_icons_MAE_y_boxes": 0.244306780397892, + "eval_icons_NUM_probability": 4.184053250355646e-05, + "eval_icons_inside_bbox": 0.0, + "eval_icons_loss": 12.817168235778809, + "eval_icons_loss_ce": 8.93913221359253, + "eval_icons_loss_iou": 1.12841796875, + "eval_icons_loss_num": 0.33642578125, + "eval_icons_loss_xval": 3.943359375, + "eval_icons_runtime": 40.2915, + "eval_icons_samples_per_second": 1.241, + "eval_icons_steps_per_second": 0.05, + "num_input_tokens_seen": 630944, + "step": 5 + }, + { + "epoch": 0.0012826268197268006, + "eval_screenspot_CIoU": -0.20609864592552185, + "eval_screenspot_GIoU": -0.2205975204706192, + "eval_screenspot_IoU": 0.01432670404513677, + "eval_screenspot_MAE_all": 0.3268027603626251, + "eval_screenspot_MAE_h": 0.3270023465156555, + "eval_screenspot_MAE_w": 0.25847244759400684, + "eval_screenspot_MAE_x_boxes": 0.34576526284217834, + "eval_screenspot_MAE_y_boxes": 0.2279253453016281, + "eval_screenspot_NUM_probability": 2.5402678147656843e-05, + "eval_screenspot_inside_bbox": 0.020833333333333332, + "eval_screenspot_loss": 13.033950805664062, + "eval_screenspot_loss_ce": 8.956350644429525, + "eval_screenspot_loss_iou": 1.2249348958333333, + "eval_screenspot_loss_num": 0.3285319010416667, + "eval_screenspot_loss_xval": 4.09375, + "eval_screenspot_runtime": 74.2469, + "eval_screenspot_samples_per_second": 1.199, + "eval_screenspot_steps_per_second": 0.04, + "num_input_tokens_seen": 630944, + "step": 5 + }, + { + "epoch": 0.0012826268197268006, + "loss": 13.121145248413086, + "loss_ce": 8.898488998413086, + "loss_iou": 1.265625, + "loss_num": 0.33984375, + "loss_xval": 4.21875, + "num_input_tokens_seen": 630944, + "step": 5 + }, + { + "epoch": 0.0015391521836721606, + "grad_norm": 176.07150268554688, + "learning_rate": 5e-06, + "loss": 10.2281, + "num_input_tokens_seen": 755984, + "step": 6 + }, + { + "epoch": 0.0015391521836721606, + "loss": 10.349876403808594, + "loss_ce": 6.2346415519714355, + "loss_iou": 1.2109375, + "loss_num": 0.337890625, + "loss_xval": 4.125, + "num_input_tokens_seen": 755984, + "step": 6 + }, + { + "epoch": 0.0017956775476175206, + "grad_norm": 178.9892120361328, + "learning_rate": 5e-06, + "loss": 10.3456, + "num_input_tokens_seen": 880952, + "step": 7 + }, + { + "epoch": 0.0017956775476175206, + "loss": 10.55301284790039, + "loss_ce": 6.431919574737549, + "loss_iou": 1.2265625, + "loss_num": 0.33203125, + "loss_xval": 4.125, + "num_input_tokens_seen": 880952, + "step": 7 + }, + { + "epoch": 0.0020522029115628807, + "grad_norm": 175.2893829345703, + "learning_rate": 5e-06, + "loss": 10.2219, + "num_input_tokens_seen": 1005892, + "step": 8 + }, + { + "epoch": 0.0020522029115628807, + "loss": 9.759332656860352, + "loss_ce": 5.85698938369751, + "loss_iou": 1.1875, + "loss_num": 0.306640625, + "loss_xval": 3.90625, + "num_input_tokens_seen": 1005892, + "step": 8 + }, + { + "epoch": 0.0023087282755082407, + "grad_norm": 172.658203125, + "learning_rate": 5e-06, + "loss": 10.114, + "num_input_tokens_seen": 1131996, + "step": 9 + }, + { + "epoch": 0.0023087282755082407, + "loss": 10.199831008911133, + "loss_ce": 6.137331008911133, + "loss_iou": 1.2265625, + "loss_num": 0.322265625, + "loss_xval": 4.0625, + "num_input_tokens_seen": 1131996, + "step": 9 + }, + { + "epoch": 0.002565253639453601, + "grad_norm": 178.62509155273438, + "learning_rate": 5e-06, + "loss": 10.1674, + "num_input_tokens_seen": 1259348, + "step": 10 + }, + { + "epoch": 0.002565253639453601, + "loss": 10.38125991821289, + "loss_ce": 6.471104145050049, + "loss_iou": 1.234375, + "loss_num": 0.287109375, + "loss_xval": 3.90625, + "num_input_tokens_seen": 1259348, + "step": 10 + }, + { + "epoch": 0.002821779003398961, + "grad_norm": 179.45404052734375, + "learning_rate": 5e-06, + "loss": 10.005, + "num_input_tokens_seen": 1385172, + "step": 11 + }, + { + "epoch": 0.002821779003398961, + "loss": 10.153079986572266, + "loss_ce": 6.158939361572266, + "loss_iou": 1.2734375, + "loss_num": 0.287109375, + "loss_xval": 4.0, + "num_input_tokens_seen": 1385172, + "step": 11 + }, + { + "epoch": 0.0030783043673443212, + "grad_norm": 174.1383819580078, + "learning_rate": 5e-06, + "loss": 9.9514, + "num_input_tokens_seen": 1511108, + "step": 12 + }, + { + "epoch": 0.0030783043673443212, + "loss": 9.887187004089355, + "loss_ce": 5.9789838790893555, + "loss_iou": 1.2734375, + "loss_num": 0.2734375, + "loss_xval": 3.90625, + "num_input_tokens_seen": 1511108, + "step": 12 + }, + { + "epoch": 0.0033348297312896812, + "grad_norm": 176.49696350097656, + "learning_rate": 5e-06, + "loss": 9.7729, + "num_input_tokens_seen": 1638300, + "step": 13 + }, + { + "epoch": 0.0033348297312896812, + "loss": 9.945709228515625, + "loss_ce": 5.961334705352783, + "loss_iou": 1.3046875, + "loss_num": 0.2734375, + "loss_xval": 3.984375, + "num_input_tokens_seen": 1638300, + "step": 13 + }, + { + "epoch": 0.0035913550952350413, + "grad_norm": 177.7743377685547, + "learning_rate": 5e-06, + "loss": 9.7995, + "num_input_tokens_seen": 1764828, + "step": 14 + }, + { + "epoch": 0.0035913550952350413, + "loss": 9.700210571289062, + "loss_ce": 5.811538219451904, + "loss_iou": 1.296875, + "loss_num": 0.259765625, + "loss_xval": 3.890625, + "num_input_tokens_seen": 1764828, + "step": 14 + }, + { + "epoch": 0.0038478804591804013, + "grad_norm": 181.67364501953125, + "learning_rate": 5e-06, + "loss": 9.9675, + "num_input_tokens_seen": 1890356, + "step": 15 + }, + { + "epoch": 0.0038478804591804013, + "loss": 10.097586631774902, + "loss_ce": 6.230399131774902, + "loss_iou": 1.3125, + "loss_num": 0.2490234375, + "loss_xval": 3.875, + "num_input_tokens_seen": 1890356, + "step": 15 + }, + { + "epoch": 0.004104405823125761, + "grad_norm": 176.5181884765625, + "learning_rate": 5e-06, + "loss": 9.8289, + "num_input_tokens_seen": 2016784, + "step": 16 + }, + { + "epoch": 0.004104405823125761, + "loss": 9.674615859985352, + "loss_ce": 5.910944938659668, + "loss_iou": 1.2890625, + "loss_num": 0.236328125, + "loss_xval": 3.765625, + "num_input_tokens_seen": 2016784, + "step": 16 + }, + { + "epoch": 0.004360931187071122, + "grad_norm": 190.3811492919922, + "learning_rate": 5e-06, + "loss": 9.7344, + "num_input_tokens_seen": 2141264, + "step": 17 + }, + { + "epoch": 0.004360931187071122, + "loss": 9.899660110473633, + "loss_ce": 6.012940406799316, + "loss_iou": 1.3359375, + "loss_num": 0.2431640625, + "loss_xval": 3.890625, + "num_input_tokens_seen": 2141264, + "step": 17 + }, + { + "epoch": 0.004617456551016481, + "grad_norm": 183.88711547851562, + "learning_rate": 5e-06, + "loss": 9.3412, + "num_input_tokens_seen": 2268080, + "step": 18 + }, + { + "epoch": 0.004617456551016481, + "loss": 9.590524673461914, + "loss_ce": 5.6706037521362305, + "loss_iou": 1.3359375, + "loss_num": 0.25, + "loss_xval": 3.921875, + "num_input_tokens_seen": 2268080, + "step": 18 + }, + { + "epoch": 0.004873981914961842, + "grad_norm": 178.27174377441406, + "learning_rate": 5e-06, + "loss": 9.177, + "num_input_tokens_seen": 2393516, + "step": 19 + }, + { + "epoch": 0.004873981914961842, + "loss": 9.073789596557617, + "loss_ce": 5.386290073394775, + "loss_iou": 1.2734375, + "loss_num": 0.228515625, + "loss_xval": 3.6875, + "num_input_tokens_seen": 2393516, + "step": 19 + }, + { + "epoch": 0.005130507278907202, + "grad_norm": 183.88156127929688, + "learning_rate": 5e-06, + "loss": 9.1972, + "num_input_tokens_seen": 2519136, + "step": 20 + }, + { + "epoch": 0.005130507278907202, + "loss": 9.260429382324219, + "loss_ce": 5.572929382324219, + "loss_iou": 1.265625, + "loss_num": 0.23046875, + "loss_xval": 3.6875, + "num_input_tokens_seen": 2519136, + "step": 20 + }, + { + "epoch": 0.005387032642852562, + "grad_norm": 182.92445373535156, + "learning_rate": 5e-06, + "loss": 9.0451, + "num_input_tokens_seen": 2645144, + "step": 21 + }, + { + "epoch": 0.005387032642852562, + "loss": 8.98104476928711, + "loss_ce": 5.391201019287109, + "loss_iou": 1.28125, + "loss_num": 0.203125, + "loss_xval": 3.59375, + "num_input_tokens_seen": 2645144, + "step": 21 + }, + { + "epoch": 0.005643558006797922, + "grad_norm": 187.06190490722656, + "learning_rate": 5e-06, + "loss": 9.2214, + "num_input_tokens_seen": 2770552, + "step": 22 + }, + { + "epoch": 0.005643558006797922, + "loss": 9.242259979248047, + "loss_ce": 5.599681854248047, + "loss_iou": 1.3125, + "loss_num": 0.2041015625, + "loss_xval": 3.640625, + "num_input_tokens_seen": 2770552, + "step": 22 + }, + { + "epoch": 0.005900083370743282, + "grad_norm": 185.10504150390625, + "learning_rate": 5e-06, + "loss": 8.7691, + "num_input_tokens_seen": 2896912, + "step": 23 + }, + { + "epoch": 0.005900083370743282, + "loss": 8.755097389221191, + "loss_ce": 5.061737537384033, + "loss_iou": 1.296875, + "loss_num": 0.220703125, + "loss_xval": 3.6875, + "num_input_tokens_seen": 2896912, + "step": 23 + }, + { + "epoch": 0.0061566087346886424, + "grad_norm": 163.2713623046875, + "learning_rate": 5e-06, + "loss": 7.7787, + "num_input_tokens_seen": 3023224, + "step": 24 + }, + { + "epoch": 0.0061566087346886424, + "loss": 7.5264129638671875, + "loss_ce": 3.9795379638671875, + "loss_iou": 1.21875, + "loss_num": 0.2216796875, + "loss_xval": 3.546875, + "num_input_tokens_seen": 3023224, + "step": 24 + }, + { + "epoch": 0.006413134098634002, + "grad_norm": 157.09910583496094, + "learning_rate": 5e-06, + "loss": 7.6111, + "num_input_tokens_seen": 3150816, + "step": 25 + }, + { + "epoch": 0.006413134098634002, + "loss": 7.760533332824707, + "loss_ce": 4.010533332824707, + "loss_iou": 1.28125, + "loss_num": 0.23828125, + "loss_xval": 3.75, + "num_input_tokens_seen": 3150816, + "step": 25 + }, + { + "epoch": 0.0066696594625793625, + "grad_norm": 155.3214569091797, + "learning_rate": 5e-06, + "loss": 7.4579, + "num_input_tokens_seen": 3275972, + "step": 26 + }, + { + "epoch": 0.0066696594625793625, + "loss": 7.657684803009033, + "loss_ce": 3.776825189590454, + "loss_iou": 1.3046875, + "loss_num": 0.255859375, + "loss_xval": 3.875, + "num_input_tokens_seen": 3275972, + "step": 26 + }, + { + "epoch": 0.006926184826524723, + "grad_norm": 154.4881591796875, + "learning_rate": 5e-06, + "loss": 7.4609, + "num_input_tokens_seen": 3402664, + "step": 27 + }, + { + "epoch": 0.006926184826524723, + "loss": 7.6258745193481445, + "loss_ce": 3.8348588943481445, + "loss_iou": 1.2578125, + "loss_num": 0.25390625, + "loss_xval": 3.796875, + "num_input_tokens_seen": 3402664, + "step": 27 + }, + { + "epoch": 0.0071827101904700826, + "grad_norm": 153.49871826171875, + "learning_rate": 5e-06, + "loss": 7.3216, + "num_input_tokens_seen": 3529004, + "step": 28 + }, + { + "epoch": 0.0071827101904700826, + "loss": 7.517573356628418, + "loss_ce": 3.738276481628418, + "loss_iou": 1.2578125, + "loss_num": 0.251953125, + "loss_xval": 3.78125, + "num_input_tokens_seen": 3529004, + "step": 28 + }, + { + "epoch": 0.007439235554415443, + "grad_norm": 149.82147216796875, + "learning_rate": 5e-06, + "loss": 7.1982, + "num_input_tokens_seen": 3655820, + "step": 29 + }, + { + "epoch": 0.007439235554415443, + "loss": 7.290696144104004, + "loss_ce": 3.478196144104004, + "loss_iou": 1.28125, + "loss_num": 0.25, + "loss_xval": 3.8125, + "num_input_tokens_seen": 3655820, + "step": 29 + }, + { + "epoch": 0.007695760918360803, + "grad_norm": 135.2654571533203, + "learning_rate": 5e-06, + "loss": 6.9257, + "num_input_tokens_seen": 3781336, + "step": 30 + }, + { + "epoch": 0.007695760918360803, + "loss": 6.717167377471924, + "loss_ce": 3.150761127471924, + "loss_iou": 1.1953125, + "loss_num": 0.234375, + "loss_xval": 3.5625, + "num_input_tokens_seen": 3781336, + "step": 30 + }, + { + "epoch": 0.007952286282306162, + "grad_norm": 114.8228759765625, + "learning_rate": 5e-06, + "loss": 6.5416, + "num_input_tokens_seen": 3907432, + "step": 31 + }, + { + "epoch": 0.007952286282306162, + "loss": 6.536857604980469, + "loss_ce": 2.8142011165618896, + "loss_iou": 1.25, + "loss_num": 0.244140625, + "loss_xval": 3.71875, + "num_input_tokens_seen": 3907432, + "step": 31 + }, + { + "epoch": 0.008208811646251523, + "grad_norm": 75.32475280761719, + "learning_rate": 5e-06, + "loss": 6.1912, + "num_input_tokens_seen": 4033160, + "step": 32 + }, + { + "epoch": 0.008208811646251523, + "loss": 6.0081682205200195, + "loss_ce": 2.5081677436828613, + "loss_iou": 1.21875, + "loss_num": 0.2119140625, + "loss_xval": 3.5, + "num_input_tokens_seen": 4033160, + "step": 32 + }, + { + "epoch": 0.008465337010196883, + "grad_norm": 68.64422607421875, + "learning_rate": 5e-06, + "loss": 6.0066, + "num_input_tokens_seen": 4160296, + "step": 33 + }, + { + "epoch": 0.008465337010196883, + "loss": 6.147429466247559, + "loss_ce": 2.3564138412475586, + "loss_iou": 1.34375, + "loss_num": 0.2197265625, + "loss_xval": 3.796875, + "num_input_tokens_seen": 4160296, + "step": 33 + }, + { + "epoch": 0.008721862374142244, + "grad_norm": 80.0741958618164, + "learning_rate": 5e-06, + "loss": 5.9022, + "num_input_tokens_seen": 4286888, + "step": 34 + }, + { + "epoch": 0.008721862374142244, + "loss": 5.923714637756348, + "loss_ce": 2.1991052627563477, + "loss_iou": 1.2734375, + "loss_num": 0.236328125, + "loss_xval": 3.71875, + "num_input_tokens_seen": 4286888, + "step": 34 + }, + { + "epoch": 0.008978387738087604, + "grad_norm": 82.49143981933594, + "learning_rate": 5e-06, + "loss": 5.6834, + "num_input_tokens_seen": 4412944, + "step": 35 + }, + { + "epoch": 0.008978387738087604, + "loss": 5.693892478942871, + "loss_ce": 2.109907865524292, + "loss_iou": 1.2421875, + "loss_num": 0.21875, + "loss_xval": 3.578125, + "num_input_tokens_seen": 4412944, + "step": 35 + }, + { + "epoch": 0.009234913102032963, + "grad_norm": 85.99980163574219, + "learning_rate": 5e-06, + "loss": 5.5289, + "num_input_tokens_seen": 4538436, + "step": 36 + }, + { + "epoch": 0.009234913102032963, + "loss": 5.608206748962402, + "loss_ce": 1.869925856590271, + "loss_iou": 1.28125, + "loss_num": 0.234375, + "loss_xval": 3.734375, + "num_input_tokens_seen": 4538436, + "step": 36 + }, + { + "epoch": 0.009491438465978323, + "grad_norm": 90.54766845703125, + "learning_rate": 5e-06, + "loss": 5.3004, + "num_input_tokens_seen": 4665172, + "step": 37 + }, + { + "epoch": 0.009491438465978323, + "loss": 5.464558124542236, + "loss_ce": 1.6325267553329468, + "loss_iou": 1.375, + "loss_num": 0.2177734375, + "loss_xval": 3.828125, + "num_input_tokens_seen": 4665172, + "step": 37 + }, + { + "epoch": 0.009747963829923684, + "grad_norm": 79.52688598632812, + "learning_rate": 5e-06, + "loss": 5.2307, + "num_input_tokens_seen": 4790836, + "step": 38 + }, + { + "epoch": 0.009747963829923684, + "loss": 5.4111738204956055, + "loss_ce": 1.5303142070770264, + "loss_iou": 1.28125, + "loss_num": 0.263671875, + "loss_xval": 3.875, + "num_input_tokens_seen": 4790836, + "step": 38 + }, + { + "epoch": 0.010004489193869044, + "grad_norm": 70.49172973632812, + "learning_rate": 5e-06, + "loss": 4.9215, + "num_input_tokens_seen": 4916608, + "step": 39 + }, + { + "epoch": 0.010004489193869044, + "loss": 4.819802284240723, + "loss_ce": 1.378395915031433, + "loss_iou": 1.1875, + "loss_num": 0.2138671875, + "loss_xval": 3.4375, + "num_input_tokens_seen": 4916608, + "step": 39 + }, + { + "epoch": 0.010261014557814405, + "grad_norm": 57.78243637084961, + "learning_rate": 5e-06, + "loss": 4.6197, + "num_input_tokens_seen": 5042688, + "step": 40 + }, + { + "epoch": 0.010261014557814405, + "loss": 4.584734916687012, + "loss_ce": 1.0984067916870117, + "loss_iou": 1.21875, + "loss_num": 0.2109375, + "loss_xval": 3.484375, + "num_input_tokens_seen": 5042688, + "step": 40 + }, + { + "epoch": 0.010517539921759763, + "grad_norm": 50.31228256225586, + "learning_rate": 5e-06, + "loss": 4.6317, + "num_input_tokens_seen": 5169276, + "step": 41 + }, + { + "epoch": 0.010517539921759763, + "loss": 4.498469829559326, + "loss_ce": 0.9926106929779053, + "loss_iou": 1.15625, + "loss_num": 0.23828125, + "loss_xval": 3.5, + "num_input_tokens_seen": 5169276, + "step": 41 + }, + { + "epoch": 0.010774065285705124, + "grad_norm": 66.95683288574219, + "learning_rate": 5e-06, + "loss": 4.5143, + "num_input_tokens_seen": 5295820, + "step": 42 + }, + { + "epoch": 0.010774065285705124, + "loss": 4.447393894195557, + "loss_ce": 0.8477846384048462, + "loss_iou": 1.2421875, + "loss_num": 0.224609375, + "loss_xval": 3.59375, + "num_input_tokens_seen": 5295820, + "step": 42 + }, + { + "epoch": 0.011030590649650484, + "grad_norm": 82.00860595703125, + "learning_rate": 5e-06, + "loss": 4.3111, + "num_input_tokens_seen": 5422928, + "step": 43 + }, + { + "epoch": 0.011030590649650484, + "loss": 4.469751358032227, + "loss_ce": 0.7568607330322266, + "loss_iou": 1.21875, + "loss_num": 0.255859375, + "loss_xval": 3.71875, + "num_input_tokens_seen": 5422928, + "step": 43 + }, + { + "epoch": 0.011287116013595845, + "grad_norm": 92.98456573486328, + "learning_rate": 5e-06, + "loss": 4.2303, + "num_input_tokens_seen": 5549380, + "step": 44 + }, + { + "epoch": 0.011287116013595845, + "loss": 4.187341690063477, + "loss_ce": 0.7537478804588318, + "loss_iou": 1.1484375, + "loss_num": 0.228515625, + "loss_xval": 3.4375, + "num_input_tokens_seen": 5549380, + "step": 44 + }, + { + "epoch": 0.011543641377541205, + "grad_norm": 85.1664810180664, + "learning_rate": 5e-06, + "loss": 4.1498, + "num_input_tokens_seen": 5675588, + "step": 45 + }, + { + "epoch": 0.011543641377541205, + "loss": 4.171885013580322, + "loss_ce": 0.48633822798728943, + "loss_iou": 1.296875, + "loss_num": 0.21875, + "loss_xval": 3.6875, + "num_input_tokens_seen": 5675588, + "step": 45 + }, + { + "epoch": 0.011800166741486564, + "grad_norm": 73.48027801513672, + "learning_rate": 5e-06, + "loss": 4.0352, + "num_input_tokens_seen": 5802732, + "step": 46 + }, + { + "epoch": 0.011800166741486564, + "loss": 3.948075294494629, + "loss_ce": 0.350419282913208, + "loss_iou": 1.171875, + "loss_num": 0.25, + "loss_xval": 3.59375, + "num_input_tokens_seen": 5802732, + "step": 46 + }, + { + "epoch": 0.012056692105431924, + "grad_norm": 43.25382995605469, + "learning_rate": 5e-06, + "loss": 3.8534, + "num_input_tokens_seen": 5928984, + "step": 47 + }, + { + "epoch": 0.012056692105431924, + "loss": 3.9803085327148438, + "loss_ce": 0.2556988596916199, + "loss_iou": 1.3203125, + "loss_num": 0.216796875, + "loss_xval": 3.71875, + "num_input_tokens_seen": 5928984, + "step": 47 + }, + { + "epoch": 0.012313217469377285, + "grad_norm": 39.125770568847656, + "learning_rate": 5e-06, + "loss": 3.8844, + "num_input_tokens_seen": 6055532, + "step": 48 + }, + { + "epoch": 0.012313217469377285, + "loss": 3.7209632396698, + "loss_ce": 0.2502601742744446, + "loss_iou": 1.1953125, + "loss_num": 0.216796875, + "loss_xval": 3.46875, + "num_input_tokens_seen": 6055532, + "step": 48 + }, + { + "epoch": 0.012569742833322645, + "grad_norm": 46.63848876953125, + "learning_rate": 5e-06, + "loss": 3.5026, + "num_input_tokens_seen": 6182520, + "step": 49 + }, + { + "epoch": 0.012569742833322645, + "loss": 3.421649932861328, + "loss_ce": 0.13844692707061768, + "loss_iou": 1.1640625, + "loss_num": 0.189453125, + "loss_xval": 3.28125, + "num_input_tokens_seen": 6182520, + "step": 49 + }, + { + "epoch": 0.012826268197268004, + "grad_norm": 34.247737884521484, + "learning_rate": 5e-06, + "loss": 3.6153, + "num_input_tokens_seen": 6309184, + "step": 50 + }, + { + "epoch": 0.012826268197268004, + "loss": 3.5534186363220215, + "loss_ce": 0.13349667191505432, + "loss_iou": 1.203125, + "loss_num": 0.2041015625, + "loss_xval": 3.421875, + "num_input_tokens_seen": 6309184, + "step": 50 + }, + { + "epoch": 0.013082793561213365, + "grad_norm": 30.019733428955078, + "learning_rate": 5e-06, + "loss": 3.7023, + "num_input_tokens_seen": 6435304, + "step": 51 + }, + { + "epoch": 0.013082793561213365, + "loss": 3.6311092376708984, + "loss_ce": 0.1584530621767044, + "loss_iou": 1.171875, + "loss_num": 0.2255859375, + "loss_xval": 3.46875, + "num_input_tokens_seen": 6435304, + "step": 51 + }, + { + "epoch": 0.013339318925158725, + "grad_norm": 37.99278259277344, + "learning_rate": 5e-06, + "loss": 3.6091, + "num_input_tokens_seen": 6562596, + "step": 52 + }, + { + "epoch": 0.013339318925158725, + "loss": 3.465839385986328, + "loss_ce": 0.12208911776542664, + "loss_iou": 1.1875, + "loss_num": 0.193359375, + "loss_xval": 3.34375, + "num_input_tokens_seen": 6562596, + "step": 52 + }, + { + "epoch": 0.013595844289104085, + "grad_norm": 50.11624526977539, + "learning_rate": 5e-06, + "loss": 3.5389, + "num_input_tokens_seen": 6690252, + "step": 53 + }, + { + "epoch": 0.013595844289104085, + "loss": 3.5958218574523926, + "loss_ce": 0.11730626225471497, + "loss_iou": 1.171875, + "loss_num": 0.2275390625, + "loss_xval": 3.484375, + "num_input_tokens_seen": 6690252, + "step": 53 + }, + { + "epoch": 0.013852369653049446, + "grad_norm": 101.21369171142578, + "learning_rate": 5e-06, + "loss": 3.7451, + "num_input_tokens_seen": 6816904, + "step": 54 + }, + { + "epoch": 0.013852369653049446, + "loss": 3.817201852798462, + "loss_ce": 0.1355612725019455, + "loss_iou": 1.3515625, + "loss_num": 0.1953125, + "loss_xval": 3.6875, + "num_input_tokens_seen": 6816904, + "step": 54 + }, + { + "epoch": 0.014108895016994805, + "grad_norm": 84.71163177490234, + "learning_rate": 5e-06, + "loss": 3.7346, + "num_input_tokens_seen": 6942940, + "step": 55 + }, + { + "epoch": 0.014108895016994805, + "loss": 3.853278160095215, + "loss_ce": 0.12866894900798798, + "loss_iou": 1.140625, + "loss_num": 0.2890625, + "loss_xval": 3.71875, + "num_input_tokens_seen": 6942940, + "step": 55 + }, + { + "epoch": 0.014365420380940165, + "grad_norm": 32.02634811401367, + "learning_rate": 5e-06, + "loss": 3.4492, + "num_input_tokens_seen": 7069788, + "step": 56 + }, + { + "epoch": 0.014365420380940165, + "loss": 3.3887178897857666, + "loss_ce": 0.060592833906412125, + "loss_iou": 1.1328125, + "loss_num": 0.2138671875, + "loss_xval": 3.328125, + "num_input_tokens_seen": 7069788, + "step": 56 + }, + { + "epoch": 0.014621945744885526, + "grad_norm": 36.80150604248047, + "learning_rate": 5e-06, + "loss": 3.4229, + "num_input_tokens_seen": 7194992, + "step": 57 + }, + { + "epoch": 0.014621945744885526, + "loss": 3.3887665271759033, + "loss_ce": 0.0645478367805481, + "loss_iou": 1.1953125, + "loss_num": 0.1875, + "loss_xval": 3.328125, + "num_input_tokens_seen": 7194992, + "step": 57 + }, + { + "epoch": 0.014878471108830886, + "grad_norm": 25.860525131225586, + "learning_rate": 5e-06, + "loss": 3.4895, + "num_input_tokens_seen": 7321896, + "step": 58 + }, + { + "epoch": 0.014878471108830886, + "loss": 3.6210527420043945, + "loss_ce": 0.06441197544336319, + "loss_iou": 1.203125, + "loss_num": 0.2294921875, + "loss_xval": 3.5625, + "num_input_tokens_seen": 7321896, + "step": 58 + }, + { + "epoch": 0.015134996472776246, + "grad_norm": 20.3499698638916, + "learning_rate": 5e-06, + "loss": 3.3917, + "num_input_tokens_seen": 7448248, + "step": 59 + }, + { + "epoch": 0.015134996472776246, + "loss": 3.5145230293273926, + "loss_ce": 0.0692107081413269, + "loss_iou": 1.1796875, + "loss_num": 0.2177734375, + "loss_xval": 3.4375, + "num_input_tokens_seen": 7448248, + "step": 59 + }, + { + "epoch": 0.015391521836721605, + "grad_norm": 35.771610260009766, + "learning_rate": 5e-06, + "loss": 3.3899, + "num_input_tokens_seen": 7573964, + "step": 60 + }, + { + "epoch": 0.015391521836721605, + "loss": 3.352057456970215, + "loss_ce": 0.049323081970214844, + "loss_iou": 1.1796875, + "loss_num": 0.189453125, + "loss_xval": 3.296875, + "num_input_tokens_seen": 7573964, + "step": 60 + }, + { + "epoch": 0.015648047200666967, + "grad_norm": 32.17473602294922, + "learning_rate": 5e-06, + "loss": 3.3821, + "num_input_tokens_seen": 7699696, + "step": 61 + }, + { + "epoch": 0.015648047200666967, + "loss": 3.474994421005249, + "loss_ce": 0.0492132194340229, + "loss_iou": 1.21875, + "loss_num": 0.19921875, + "loss_xval": 3.421875, + "num_input_tokens_seen": 7699696, + "step": 61 + }, + { + "epoch": 0.015904572564612324, + "grad_norm": 25.308780670166016, + "learning_rate": 5e-06, + "loss": 3.3634, + "num_input_tokens_seen": 7826244, + "step": 62 + }, + { + "epoch": 0.015904572564612324, + "loss": 3.264090061187744, + "loss_ce": 0.05315283685922623, + "loss_iou": 1.15625, + "loss_num": 0.1787109375, + "loss_xval": 3.21875, + "num_input_tokens_seen": 7826244, + "step": 62 + }, + { + "epoch": 0.016161097928557685, + "grad_norm": 70.0099868774414, + "learning_rate": 5e-06, + "loss": 3.3452, + "num_input_tokens_seen": 7954136, + "step": 63 + }, + { + "epoch": 0.016161097928557685, + "loss": 3.261018753051758, + "loss_ce": 0.052034344524145126, + "loss_iou": 1.1953125, + "loss_num": 0.1640625, + "loss_xval": 3.203125, + "num_input_tokens_seen": 7954136, + "step": 63 + }, + { + "epoch": 0.016417623292503045, + "grad_norm": 87.06526184082031, + "learning_rate": 5e-06, + "loss": 3.5862, + "num_input_tokens_seen": 8079696, + "step": 64 + }, + { + "epoch": 0.016417623292503045, + "loss": 3.6002750396728516, + "loss_ce": 0.059259358793497086, + "loss_iou": 1.171875, + "loss_num": 0.23828125, + "loss_xval": 3.546875, + "num_input_tokens_seen": 8079696, + "step": 64 + }, + { + "epoch": 0.016674148656448406, + "grad_norm": 41.897010803222656, + "learning_rate": 5e-06, + "loss": 3.2868, + "num_input_tokens_seen": 8205432, + "step": 65 + }, + { + "epoch": 0.016674148656448406, + "loss": 3.282851219177246, + "loss_ce": 0.05628880858421326, + "loss_iou": 1.171875, + "loss_num": 0.1767578125, + "loss_xval": 3.21875, + "num_input_tokens_seen": 8205432, + "step": 65 + }, + { + "epoch": 0.016930674020393766, + "grad_norm": 24.853635787963867, + "learning_rate": 5e-06, + "loss": 3.3248, + "num_input_tokens_seen": 8331668, + "step": 66 + }, + { + "epoch": 0.016930674020393766, + "loss": 3.474534511566162, + "loss_ce": 0.035081423819065094, + "loss_iou": 1.2421875, + "loss_num": 0.19140625, + "loss_xval": 3.4375, + "num_input_tokens_seen": 8331668, + "step": 66 + }, + { + "epoch": 0.017187199384339127, + "grad_norm": 38.64767837524414, + "learning_rate": 5e-06, + "loss": 3.169, + "num_input_tokens_seen": 8457656, + "step": 67 + }, + { + "epoch": 0.017187199384339127, + "loss": 3.1116318702697754, + "loss_ce": 0.03350668400526047, + "loss_iou": 1.1171875, + "loss_num": 0.1689453125, + "loss_xval": 3.078125, + "num_input_tokens_seen": 8457656, + "step": 67 + }, + { + "epoch": 0.017443724748284487, + "grad_norm": 43.23768997192383, + "learning_rate": 5e-06, + "loss": 3.2564, + "num_input_tokens_seen": 8583868, + "step": 68 + }, + { + "epoch": 0.017443724748284487, + "loss": 3.2485389709472656, + "loss_ce": 0.04150766506791115, + "loss_iou": 1.1875, + "loss_num": 0.16796875, + "loss_xval": 3.203125, + "num_input_tokens_seen": 8583868, + "step": 68 + }, + { + "epoch": 0.017700250112229848, + "grad_norm": 59.72880554199219, + "learning_rate": 5e-06, + "loss": 3.2362, + "num_input_tokens_seen": 8711404, + "step": 69 + }, + { + "epoch": 0.017700250112229848, + "loss": 3.1349587440490723, + "loss_ce": 0.025583885610103607, + "loss_iou": 1.125, + "loss_num": 0.1728515625, + "loss_xval": 3.109375, + "num_input_tokens_seen": 8711404, + "step": 69 + }, + { + "epoch": 0.017956775476175208, + "grad_norm": 115.53094482421875, + "learning_rate": 5e-06, + "loss": 3.446, + "num_input_tokens_seen": 8838336, + "step": 70 + }, + { + "epoch": 0.017956775476175208, + "loss": 3.65211820602417, + "loss_ce": 0.027118433266878128, + "loss_iou": 1.4453125, + "loss_num": 0.1474609375, + "loss_xval": 3.625, + "num_input_tokens_seen": 8838336, + "step": 70 + }, + { + "epoch": 0.01821330084012057, + "grad_norm": 82.38813781738281, + "learning_rate": 5e-06, + "loss": 3.7034, + "num_input_tokens_seen": 8963896, + "step": 71 + }, + { + "epoch": 0.01821330084012057, + "loss": 3.750781536102295, + "loss_ce": 0.028125401586294174, + "loss_iou": 1.15625, + "loss_num": 0.28125, + "loss_xval": 3.71875, + "num_input_tokens_seen": 8963896, + "step": 71 + }, + { + "epoch": 0.018469826204065926, + "grad_norm": 39.2523078918457, + "learning_rate": 5e-06, + "loss": 3.3244, + "num_input_tokens_seen": 9089836, + "step": 72 + }, + { + "epoch": 0.018469826204065926, + "loss": 3.445977210998535, + "loss_ce": 0.04168044403195381, + "loss_iou": 1.125, + "loss_num": 0.2314453125, + "loss_xval": 3.40625, + "num_input_tokens_seen": 9089836, + "step": 72 + }, + { + "epoch": 0.018726351568011286, + "grad_norm": 47.28199768066406, + "learning_rate": 5e-06, + "loss": 3.2184, + "num_input_tokens_seen": 9215664, + "step": 73 + }, + { + "epoch": 0.018726351568011286, + "loss": 3.3213675022125244, + "loss_ce": 0.0264457818120718, + "loss_iou": 1.15625, + "loss_num": 0.1943359375, + "loss_xval": 3.296875, + "num_input_tokens_seen": 9215664, + "step": 73 + }, + { + "epoch": 0.018982876931956646, + "grad_norm": 20.887706756591797, + "learning_rate": 5e-06, + "loss": 3.1067, + "num_input_tokens_seen": 9342228, + "step": 74 + }, + { + "epoch": 0.018982876931956646, + "loss": 3.0648765563964844, + "loss_ce": 0.027767088264226913, + "loss_iou": 1.1015625, + "loss_num": 0.166015625, + "loss_xval": 3.03125, + "num_input_tokens_seen": 9342228, + "step": 74 + }, + { + "epoch": 0.019239402295902007, + "grad_norm": 44.38043975830078, + "learning_rate": 5e-06, + "loss": 3.1287, + "num_input_tokens_seen": 9467672, + "step": 75 + }, + { + "epoch": 0.019239402295902007, + "loss": 3.1120338439941406, + "loss_ce": 0.020236866548657417, + "loss_iou": 1.1796875, + "loss_num": 0.1455078125, + "loss_xval": 3.09375, + "num_input_tokens_seen": 9467672, + "step": 75 + }, + { + "epoch": 0.019495927659847367, + "grad_norm": 39.1973991394043, + "learning_rate": 5e-06, + "loss": 3.0963, + "num_input_tokens_seen": 9594548, + "step": 76 + }, + { + "epoch": 0.019495927659847367, + "loss": 3.243161201477051, + "loss_ce": 0.03808319941163063, + "loss_iou": 1.125, + "loss_num": 0.19140625, + "loss_xval": 3.203125, + "num_input_tokens_seen": 9594548, + "step": 76 + }, + { + "epoch": 0.019752453023792728, + "grad_norm": 43.993995666503906, + "learning_rate": 5e-06, + "loss": 3.0795, + "num_input_tokens_seen": 9721120, + "step": 77 + }, + { + "epoch": 0.019752453023792728, + "loss": 3.0367302894592285, + "loss_ce": 0.026964720338582993, + "loss_iou": 1.1328125, + "loss_num": 0.1484375, + "loss_xval": 3.015625, + "num_input_tokens_seen": 9721120, + "step": 77 + }, + { + "epoch": 0.02000897838773809, + "grad_norm": 38.7779541015625, + "learning_rate": 5e-06, + "loss": 3.0596, + "num_input_tokens_seen": 9847768, + "step": 78 + }, + { + "epoch": 0.02000897838773809, + "loss": 3.1277730464935303, + "loss_ce": 0.022304125130176544, + "loss_iou": 1.1328125, + "loss_num": 0.16796875, + "loss_xval": 3.109375, + "num_input_tokens_seen": 9847768, + "step": 78 + }, + { + "epoch": 0.02026550375168345, + "grad_norm": 39.896732330322266, + "learning_rate": 5e-06, + "loss": 3.0943, + "num_input_tokens_seen": 9974152, + "step": 79 + }, + { + "epoch": 0.02026550375168345, + "loss": 3.2204573154449463, + "loss_ce": 0.027098044753074646, + "loss_iou": 1.1953125, + "loss_num": 0.162109375, + "loss_xval": 3.1875, + "num_input_tokens_seen": 9974152, + "step": 79 + }, + { + "epoch": 0.02052202911562881, + "grad_norm": 31.274333953857422, + "learning_rate": 5e-06, + "loss": 3.0824, + "num_input_tokens_seen": 10099648, + "step": 80 + }, + { + "epoch": 0.02052202911562881, + "loss": 2.9422428607940674, + "loss_ce": 0.02622729167342186, + "loss_iou": 1.0625, + "loss_num": 0.158203125, + "loss_xval": 2.921875, + "num_input_tokens_seen": 10099648, + "step": 80 + }, + { + "epoch": 0.020778554479574166, + "grad_norm": 26.87254524230957, + "learning_rate": 5e-06, + "loss": 2.9988, + "num_input_tokens_seen": 10225612, + "step": 81 + }, + { + "epoch": 0.020778554479574166, + "loss": 2.88398814201355, + "loss_ce": 0.020706914365291595, + "loss_iou": 1.0546875, + "loss_num": 0.150390625, + "loss_xval": 2.859375, + "num_input_tokens_seen": 10225612, + "step": 81 + }, + { + "epoch": 0.021035079843519527, + "grad_norm": 64.69682312011719, + "learning_rate": 5e-06, + "loss": 2.9294, + "num_input_tokens_seen": 10352960, + "step": 82 + }, + { + "epoch": 0.021035079843519527, + "loss": 2.920563220977783, + "loss_ce": 0.02603220008313656, + "loss_iou": 1.125, + "loss_num": 0.12890625, + "loss_xval": 2.890625, + "num_input_tokens_seen": 10352960, + "step": 82 + }, + { + "epoch": 0.021291605207464887, + "grad_norm": 40.93315124511719, + "learning_rate": 5e-06, + "loss": 3.1489, + "num_input_tokens_seen": 10479884, + "step": 83 + }, + { + "epoch": 0.021291605207464887, + "loss": 3.2524781227111816, + "loss_ce": 0.018103033304214478, + "loss_iou": 1.1484375, + "loss_num": 0.1884765625, + "loss_xval": 3.234375, + "num_input_tokens_seen": 10479884, + "step": 83 + }, + { + "epoch": 0.021548130571410248, + "grad_norm": 47.865203857421875, + "learning_rate": 5e-06, + "loss": 3.1314, + "num_input_tokens_seen": 10607176, + "step": 84 + }, + { + "epoch": 0.021548130571410248, + "loss": 3.0634937286376953, + "loss_ce": 0.012712271884083748, + "loss_iou": 1.140625, + "loss_num": 0.1533203125, + "loss_xval": 3.046875, + "num_input_tokens_seen": 10607176, + "step": 84 + }, + { + "epoch": 0.021804655935355608, + "grad_norm": 44.862850189208984, + "learning_rate": 5e-06, + "loss": 2.9932, + "num_input_tokens_seen": 10734508, + "step": 85 + }, + { + "epoch": 0.021804655935355608, + "loss": 3.0022835731506348, + "loss_ce": 0.01986151747405529, + "loss_iou": 1.0703125, + "loss_num": 0.1689453125, + "loss_xval": 2.984375, + "num_input_tokens_seen": 10734508, + "step": 85 + }, + { + "epoch": 0.02206118129930097, + "grad_norm": 95.27526092529297, + "learning_rate": 5e-06, + "loss": 3.0355, + "num_input_tokens_seen": 10861348, + "step": 86 + }, + { + "epoch": 0.02206118129930097, + "loss": 3.0225088596343994, + "loss_ce": 0.01274324581027031, + "loss_iou": 1.1875, + "loss_num": 0.1259765625, + "loss_xval": 3.015625, + "num_input_tokens_seen": 10861348, + "step": 86 + }, + { + "epoch": 0.02231770666324633, + "grad_norm": 78.31133270263672, + "learning_rate": 5e-06, + "loss": 3.3175, + "num_input_tokens_seen": 10987384, + "step": 87 + }, + { + "epoch": 0.02231770666324633, + "loss": 3.3293075561523438, + "loss_ce": 0.016807712614536285, + "loss_iou": 1.140625, + "loss_num": 0.2060546875, + "loss_xval": 3.3125, + "num_input_tokens_seen": 10987384, + "step": 87 + }, + { + "epoch": 0.02257423202719169, + "grad_norm": 46.081546783447266, + "learning_rate": 5e-06, + "loss": 3.0145, + "num_input_tokens_seen": 11113360, + "step": 88 + }, + { + "epoch": 0.02257423202719169, + "loss": 3.032632827758789, + "loss_ce": 0.015054561197757721, + "loss_iou": 1.1328125, + "loss_num": 0.150390625, + "loss_xval": 3.015625, + "num_input_tokens_seen": 11113360, + "step": 88 + }, + { + "epoch": 0.02283075739113705, + "grad_norm": 60.697174072265625, + "learning_rate": 5e-06, + "loss": 2.9269, + "num_input_tokens_seen": 11240308, + "step": 89 + }, + { + "epoch": 0.02283075739113705, + "loss": 2.7379448413848877, + "loss_ce": 0.019194845110177994, + "loss_iou": 1.0703125, + "loss_num": 0.115234375, + "loss_xval": 2.71875, + "num_input_tokens_seen": 11240308, + "step": 89 + }, + { + "epoch": 0.02308728275508241, + "grad_norm": 36.36449432373047, + "learning_rate": 5e-06, + "loss": 3.0375, + "num_input_tokens_seen": 11366208, + "step": 90 + }, + { + "epoch": 0.02308728275508241, + "loss": 2.968803882598877, + "loss_ce": 0.013725914061069489, + "loss_iou": 1.0703125, + "loss_num": 0.1640625, + "loss_xval": 2.953125, + "num_input_tokens_seen": 11366208, + "step": 90 + }, + { + "epoch": 0.023343808119027767, + "grad_norm": 24.182878494262695, + "learning_rate": 5e-06, + "loss": 2.8852, + "num_input_tokens_seen": 11493124, + "step": 91 + }, + { + "epoch": 0.023343808119027767, + "loss": 2.950312376022339, + "loss_ce": 0.014765567146241665, + "loss_iou": 1.0703125, + "loss_num": 0.1591796875, + "loss_xval": 2.9375, + "num_input_tokens_seen": 11493124, + "step": 91 + }, + { + "epoch": 0.023600333482973128, + "grad_norm": 36.32478713989258, + "learning_rate": 5e-06, + "loss": 2.8243, + "num_input_tokens_seen": 11618440, + "step": 92 + }, + { + "epoch": 0.023600333482973128, + "loss": 2.777660846710205, + "loss_ce": 0.012035926803946495, + "loss_iou": 1.0859375, + "loss_num": 0.11962890625, + "loss_xval": 2.765625, + "num_input_tokens_seen": 11618440, + "step": 92 + }, + { + "epoch": 0.02385685884691849, + "grad_norm": 30.922170639038086, + "learning_rate": 5e-06, + "loss": 2.8283, + "num_input_tokens_seen": 11746096, + "step": 93 + }, + { + "epoch": 0.02385685884691849, + "loss": 2.915024757385254, + "loss_ce": 0.022446738556027412, + "loss_iou": 1.0625, + "loss_num": 0.1533203125, + "loss_xval": 2.890625, + "num_input_tokens_seen": 11746096, + "step": 93 + }, + { + "epoch": 0.02411338421086385, + "grad_norm": 27.584747314453125, + "learning_rate": 5e-06, + "loss": 2.7553, + "num_input_tokens_seen": 11873716, + "step": 94 + }, + { + "epoch": 0.02411338421086385, + "loss": 2.796095848083496, + "loss_ce": 0.016799159348011017, + "loss_iou": 1.09375, + "loss_num": 0.1171875, + "loss_xval": 2.78125, + "num_input_tokens_seen": 11873716, + "step": 94 + }, + { + "epoch": 0.02436990957480921, + "grad_norm": 37.147037506103516, + "learning_rate": 5e-06, + "loss": 2.792, + "num_input_tokens_seen": 12000444, + "step": 95 + }, + { + "epoch": 0.02436990957480921, + "loss": 2.917742967605591, + "loss_ce": 0.013446154072880745, + "loss_iou": 1.09375, + "loss_num": 0.1416015625, + "loss_xval": 2.90625, + "num_input_tokens_seen": 12000444, + "step": 95 + }, + { + "epoch": 0.02462643493875457, + "grad_norm": 85.68004608154297, + "learning_rate": 5e-06, + "loss": 2.9387, + "num_input_tokens_seen": 12127516, + "step": 96 + }, + { + "epoch": 0.02462643493875457, + "loss": 2.8435819149017334, + "loss_ce": 0.01741006039083004, + "loss_iou": 1.1484375, + "loss_num": 0.10498046875, + "loss_xval": 2.828125, + "num_input_tokens_seen": 12127516, + "step": 96 + }, + { + "epoch": 0.02488296030269993, + "grad_norm": 67.87000274658203, + "learning_rate": 5e-06, + "loss": 3.0883, + "num_input_tokens_seen": 12254856, + "step": 97 + }, + { + "epoch": 0.02488296030269993, + "loss": 3.1289682388305664, + "loss_ce": 0.013733651489019394, + "loss_iou": 1.125, + "loss_num": 0.1748046875, + "loss_xval": 3.109375, + "num_input_tokens_seen": 12254856, + "step": 97 + }, + { + "epoch": 0.02513948566664529, + "grad_norm": 28.72338104248047, + "learning_rate": 5e-06, + "loss": 2.8677, + "num_input_tokens_seen": 12380996, + "step": 98 + }, + { + "epoch": 0.02513948566664529, + "loss": 2.9589712619781494, + "loss_ce": 0.015611954964697361, + "loss_iou": 1.09375, + "loss_num": 0.150390625, + "loss_xval": 2.9375, + "num_input_tokens_seen": 12380996, + "step": 98 + }, + { + "epoch": 0.02539601103059065, + "grad_norm": 34.20926284790039, + "learning_rate": 5e-06, + "loss": 2.7764, + "num_input_tokens_seen": 12506700, + "step": 99 + }, + { + "epoch": 0.02539601103059065, + "loss": 2.894710063934326, + "loss_ce": 0.013850577175617218, + "loss_iou": 1.109375, + "loss_num": 0.134765625, + "loss_xval": 2.875, + "num_input_tokens_seen": 12506700, + "step": 99 + }, + { + "epoch": 0.025652536394536008, + "grad_norm": 30.65720558166504, + "learning_rate": 5e-06, + "loss": 2.8432, + "num_input_tokens_seen": 12631416, + "step": 100 + }, + { + "epoch": 0.025652536394536008, + "loss": 2.8236584663391113, + "loss_ce": 0.01311160996556282, + "loss_iou": 1.1015625, + "loss_num": 0.1220703125, + "loss_xval": 2.8125, + "num_input_tokens_seen": 12631416, + "step": 100 + }, + { + "epoch": 0.02590906175848137, + "grad_norm": 37.46623992919922, + "learning_rate": 5e-06, + "loss": 2.8338, + "num_input_tokens_seen": 12758496, + "step": 101 + }, + { + "epoch": 0.02590906175848137, + "loss": 2.713740348815918, + "loss_ce": 0.016474712640047073, + "loss_iou": 1.0546875, + "loss_num": 0.1181640625, + "loss_xval": 2.703125, + "num_input_tokens_seen": 12758496, + "step": 101 + }, + { + "epoch": 0.02616558712242673, + "grad_norm": 71.9797134399414, + "learning_rate": 5e-06, + "loss": 2.9087, + "num_input_tokens_seen": 12884212, + "step": 102 + }, + { + "epoch": 0.02616558712242673, + "loss": 2.7867157459259033, + "loss_ce": 0.021090732887387276, + "loss_iou": 1.109375, + "loss_num": 0.107421875, + "loss_xval": 2.765625, + "num_input_tokens_seen": 12884212, + "step": 102 + }, + { + "epoch": 0.02642211248637209, + "grad_norm": 56.98023986816406, + "learning_rate": 5e-06, + "loss": 2.9836, + "num_input_tokens_seen": 13010340, + "step": 103 + }, + { + "epoch": 0.02642211248637209, + "loss": 2.994760513305664, + "loss_ce": 0.010385450907051563, + "loss_iou": 1.078125, + "loss_num": 0.1650390625, + "loss_xval": 2.984375, + "num_input_tokens_seen": 13010340, + "step": 103 + }, + { + "epoch": 0.02667863785031745, + "grad_norm": 25.406171798706055, + "learning_rate": 5e-06, + "loss": 2.8256, + "num_input_tokens_seen": 13136248, + "step": 104 + }, + { + "epoch": 0.02667863785031745, + "loss": 2.6833677291870117, + "loss_ce": 0.017352260649204254, + "loss_iou": 1.0234375, + "loss_num": 0.123046875, + "loss_xval": 2.671875, + "num_input_tokens_seen": 13136248, + "step": 104 + }, + { + "epoch": 0.02693516321426281, + "grad_norm": 32.290584564208984, + "learning_rate": 5e-06, + "loss": 2.7357, + "num_input_tokens_seen": 13262340, + "step": 105 + }, + { + "epoch": 0.02693516321426281, + "loss": 2.6801185607910156, + "loss_ce": 0.014102968387305737, + "loss_iou": 1.046875, + "loss_num": 0.115234375, + "loss_xval": 2.671875, + "num_input_tokens_seen": 13262340, + "step": 105 + }, + { + "epoch": 0.02719168857820817, + "grad_norm": 31.57007598876953, + "learning_rate": 5e-06, + "loss": 2.6092, + "num_input_tokens_seen": 13389420, + "step": 106 + }, + { + "epoch": 0.02719168857820817, + "loss": 2.665848731994629, + "loss_ce": 0.027176853269338608, + "loss_iou": 1.0625, + "loss_num": 0.1015625, + "loss_xval": 2.640625, + "num_input_tokens_seen": 13389420, + "step": 106 + }, + { + "epoch": 0.02744821394215353, + "grad_norm": 22.632143020629883, + "learning_rate": 5e-06, + "loss": 2.6059, + "num_input_tokens_seen": 13515164, + "step": 107 + }, + { + "epoch": 0.02744821394215353, + "loss": 2.6642847061157227, + "loss_ce": 0.008034702390432358, + "loss_iou": 1.0703125, + "loss_num": 0.10302734375, + "loss_xval": 2.65625, + "num_input_tokens_seen": 13515164, + "step": 107 + }, + { + "epoch": 0.027704739306098892, + "grad_norm": 39.209228515625, + "learning_rate": 5e-06, + "loss": 2.6815, + "num_input_tokens_seen": 13641412, + "step": 108 + }, + { + "epoch": 0.027704739306098892, + "loss": 2.660715103149414, + "loss_ce": 0.012277309782803059, + "loss_iou": 1.0859375, + "loss_num": 0.09423828125, + "loss_xval": 2.65625, + "num_input_tokens_seen": 13641412, + "step": 108 + }, + { + "epoch": 0.027961264670044252, + "grad_norm": 53.89849090576172, + "learning_rate": 5e-06, + "loss": 2.6715, + "num_input_tokens_seen": 13766652, + "step": 109 + }, + { + "epoch": 0.027961264670044252, + "loss": 2.664438247680664, + "loss_ce": 0.01014143880456686, + "loss_iou": 1.09375, + "loss_num": 0.09375, + "loss_xval": 2.65625, + "num_input_tokens_seen": 13766652, + "step": 109 + }, + { + "epoch": 0.02821779003398961, + "grad_norm": 54.301300048828125, + "learning_rate": 5e-06, + "loss": 2.6584, + "num_input_tokens_seen": 13894408, + "step": 110 + }, + { + "epoch": 0.02821779003398961, + "loss": 2.6851541996002197, + "loss_ce": 0.009373034350574017, + "loss_iou": 1.0703125, + "loss_num": 0.1064453125, + "loss_xval": 2.671875, + "num_input_tokens_seen": 13894408, + "step": 110 + }, + { + "epoch": 0.02847431539793497, + "grad_norm": 36.57997512817383, + "learning_rate": 5e-06, + "loss": 2.6983, + "num_input_tokens_seen": 14020456, + "step": 111 + }, + { + "epoch": 0.02847431539793497, + "loss": 2.5988218784332275, + "loss_ce": 0.012884370982646942, + "loss_iou": 1.03125, + "loss_num": 0.10498046875, + "loss_xval": 2.59375, + "num_input_tokens_seen": 14020456, + "step": 111 + }, + { + "epoch": 0.02873084076188033, + "grad_norm": 25.318971633911133, + "learning_rate": 5e-06, + "loss": 2.648, + "num_input_tokens_seen": 14147268, + "step": 112 + }, + { + "epoch": 0.02873084076188033, + "loss": 2.795989990234375, + "loss_ce": 0.014740070328116417, + "loss_iou": 1.1015625, + "loss_num": 0.1162109375, + "loss_xval": 2.78125, + "num_input_tokens_seen": 14147268, + "step": 112 + }, + { + "epoch": 0.02898736612582569, + "grad_norm": 53.55531692504883, + "learning_rate": 5e-06, + "loss": 2.6171, + "num_input_tokens_seen": 14273352, + "step": 113 + }, + { + "epoch": 0.02898736612582569, + "loss": 2.6019198894500732, + "loss_ce": 0.01207612082362175, + "loss_iou": 1.0625, + "loss_num": 0.09375, + "loss_xval": 2.59375, + "num_input_tokens_seen": 14273352, + "step": 113 + }, + { + "epoch": 0.02924389148977105, + "grad_norm": 49.240386962890625, + "learning_rate": 5e-06, + "loss": 2.7605, + "num_input_tokens_seen": 14399308, + "step": 114 + }, + { + "epoch": 0.02924389148977105, + "loss": 2.9250965118408203, + "loss_ce": 0.007127617485821247, + "loss_iou": 1.1171875, + "loss_num": 0.1357421875, + "loss_xval": 2.921875, + "num_input_tokens_seen": 14399308, + "step": 114 + }, + { + "epoch": 0.02950041685371641, + "grad_norm": 84.77357482910156, + "learning_rate": 5e-06, + "loss": 2.7282, + "num_input_tokens_seen": 14525000, + "step": 115 + }, + { + "epoch": 0.02950041685371641, + "loss": 2.7143073081970215, + "loss_ce": 0.007276169024407864, + "loss_iou": 1.125, + "loss_num": 0.08984375, + "loss_xval": 2.703125, + "num_input_tokens_seen": 14525000, + "step": 115 + }, + { + "epoch": 0.029756942217661772, + "grad_norm": 52.75733947753906, + "learning_rate": 5e-06, + "loss": 2.7703, + "num_input_tokens_seen": 14651664, + "step": 116 + }, + { + "epoch": 0.029756942217661772, + "loss": 2.7669665813446045, + "loss_ce": 0.00915401242673397, + "loss_iou": 1.078125, + "loss_num": 0.12158203125, + "loss_xval": 2.75, + "num_input_tokens_seen": 14651664, + "step": 116 + }, + { + "epoch": 0.030013467581607133, + "grad_norm": 73.32365417480469, + "learning_rate": 5e-06, + "loss": 2.7026, + "num_input_tokens_seen": 14778136, + "step": 117 + }, + { + "epoch": 0.030013467581607133, + "loss": 2.737086057662964, + "loss_ce": 0.016382912173867226, + "loss_iou": 1.1171875, + "loss_num": 0.095703125, + "loss_xval": 2.71875, + "num_input_tokens_seen": 14778136, + "step": 117 + }, + { + "epoch": 0.030269992945552493, + "grad_norm": 54.40532302856445, + "learning_rate": 5e-06, + "loss": 2.8099, + "num_input_tokens_seen": 14904632, + "step": 118 + }, + { + "epoch": 0.030269992945552493, + "loss": 2.8112223148345947, + "loss_ce": 0.006534915417432785, + "loss_iou": 1.0625, + "loss_num": 0.1376953125, + "loss_xval": 2.8125, + "num_input_tokens_seen": 14904632, + "step": 118 + }, + { + "epoch": 0.03052651830949785, + "grad_norm": 26.202964782714844, + "learning_rate": 5e-06, + "loss": 2.7059, + "num_input_tokens_seen": 15030976, + "step": 119 + }, + { + "epoch": 0.03052651830949785, + "loss": 2.6679232120513916, + "loss_ce": 0.007766999304294586, + "loss_iou": 1.046875, + "loss_num": 0.11328125, + "loss_xval": 2.65625, + "num_input_tokens_seen": 15030976, + "step": 119 + }, + { + "epoch": 0.03078304367344321, + "grad_norm": 19.56947898864746, + "learning_rate": 5e-06, + "loss": 2.5016, + "num_input_tokens_seen": 15157084, + "step": 120 + }, + { + "epoch": 0.03078304367344321, + "loss": 2.6053390502929688, + "loss_ce": 0.005729649215936661, + "loss_iou": 1.0390625, + "loss_num": 0.10546875, + "loss_xval": 2.59375, + "num_input_tokens_seen": 15157084, + "step": 120 + }, + { + "epoch": 0.03103956903738857, + "grad_norm": 122.9305419921875, + "learning_rate": 5e-06, + "loss": 2.5494, + "num_input_tokens_seen": 15283388, + "step": 121 + }, + { + "epoch": 0.03103956903738857, + "loss": 2.707568645477295, + "loss_ce": 0.014209110289812088, + "loss_iou": 1.09375, + "loss_num": 0.1015625, + "loss_xval": 2.6875, + "num_input_tokens_seen": 15283388, + "step": 121 + }, + { + "epoch": 0.031296094401333935, + "grad_norm": 47.97330093383789, + "learning_rate": 5e-06, + "loss": 2.5619, + "num_input_tokens_seen": 15409908, + "step": 122 + }, + { + "epoch": 0.031296094401333935, + "loss": 2.526622772216797, + "loss_ce": 0.007091344799846411, + "loss_iou": 1.0703125, + "loss_num": 0.07666015625, + "loss_xval": 2.515625, + "num_input_tokens_seen": 15409908, + "step": 122 + }, + { + "epoch": 0.031552619765279295, + "grad_norm": 60.44999694824219, + "learning_rate": 5e-06, + "loss": 2.7455, + "num_input_tokens_seen": 15537004, + "step": 123 + }, + { + "epoch": 0.031552619765279295, + "loss": 2.6782548427581787, + "loss_ce": 0.012239217758178711, + "loss_iou": 1.0625, + "loss_num": 0.10693359375, + "loss_xval": 2.671875, + "num_input_tokens_seen": 15537004, + "step": 123 + }, + { + "epoch": 0.03180914512922465, + "grad_norm": 71.20117950439453, + "learning_rate": 5e-06, + "loss": 2.6299, + "num_input_tokens_seen": 15663468, + "step": 124 + }, + { + "epoch": 0.03180914512922465, + "loss": 2.6547741889953613, + "loss_ce": 0.004383578430861235, + "loss_iou": 1.1015625, + "loss_num": 0.08984375, + "loss_xval": 2.65625, + "num_input_tokens_seen": 15663468, + "step": 124 + }, + { + "epoch": 0.03206567049317001, + "grad_norm": 53.503841400146484, + "learning_rate": 5e-06, + "loss": 2.744, + "num_input_tokens_seen": 15789200, + "step": 125 + }, + { + "epoch": 0.03206567049317001, + "loss": 2.7980947494506836, + "loss_ce": 0.007079221308231354, + "loss_iou": 1.1015625, + "loss_num": 0.11865234375, + "loss_xval": 2.796875, + "num_input_tokens_seen": 15789200, + "step": 125 + }, + { + "epoch": 0.03232219585711537, + "grad_norm": 28.437164306640625, + "learning_rate": 5e-06, + "loss": 2.6505, + "num_input_tokens_seen": 15914772, + "step": 126 + }, + { + "epoch": 0.03232219585711537, + "loss": 2.6763556003570557, + "loss_ce": 0.004480727482587099, + "loss_iou": 1.0703125, + "loss_num": 0.1064453125, + "loss_xval": 2.671875, + "num_input_tokens_seen": 15914772, + "step": 126 + }, + { + "epoch": 0.03257872122106073, + "grad_norm": 27.92886734008789, + "learning_rate": 5e-06, + "loss": 2.4758, + "num_input_tokens_seen": 16042028, + "step": 127 + }, + { + "epoch": 0.03257872122106073, + "loss": 2.4179646968841553, + "loss_ce": 0.009761575609445572, + "loss_iou": 0.984375, + "loss_num": 0.087890625, + "loss_xval": 2.40625, + "num_input_tokens_seen": 16042028, + "step": 127 + }, + { + "epoch": 0.03283524658500609, + "grad_norm": 52.326141357421875, + "learning_rate": 5e-06, + "loss": 2.6263, + "num_input_tokens_seen": 16168556, + "step": 128 + }, + { + "epoch": 0.03283524658500609, + "loss": 2.733271598815918, + "loss_ce": 0.0067090727388858795, + "loss_iou": 1.15625, + "loss_num": 0.083984375, + "loss_xval": 2.71875, + "num_input_tokens_seen": 16168556, + "step": 128 + }, + { + "epoch": 0.03309177194895145, + "grad_norm": 44.34842300415039, + "learning_rate": 5e-06, + "loss": 2.6195, + "num_input_tokens_seen": 16294848, + "step": 129 + }, + { + "epoch": 0.03309177194895145, + "loss": 2.589071750640869, + "loss_ce": 0.01485302671790123, + "loss_iou": 1.0390625, + "loss_num": 0.10009765625, + "loss_xval": 2.578125, + "num_input_tokens_seen": 16294848, + "step": 129 + }, + { + "epoch": 0.03334829731289681, + "grad_norm": 37.2363166809082, + "learning_rate": 5e-06, + "loss": 2.4356, + "num_input_tokens_seen": 16422168, + "step": 130 + }, + { + "epoch": 0.03334829731289681, + "loss": 2.5008912086486816, + "loss_ce": 0.0067505743354558945, + "loss_iou": 1.03125, + "loss_num": 0.08642578125, + "loss_xval": 2.5, + "num_input_tokens_seen": 16422168, + "step": 130 + }, + { + "epoch": 0.03360482267684217, + "grad_norm": 25.902233123779297, + "learning_rate": 5e-06, + "loss": 2.5929, + "num_input_tokens_seen": 16549136, + "step": 131 + }, + { + "epoch": 0.03360482267684217, + "loss": 2.5972297191619873, + "loss_ce": 0.01129220798611641, + "loss_iou": 1.0625, + "loss_num": 0.09375, + "loss_xval": 2.59375, + "num_input_tokens_seen": 16549136, + "step": 131 + }, + { + "epoch": 0.03386134804078753, + "grad_norm": 41.642578125, + "learning_rate": 5e-06, + "loss": 2.4405, + "num_input_tokens_seen": 16675308, + "step": 132 + }, + { + "epoch": 0.03386134804078753, + "loss": 2.3397598266601562, + "loss_ce": 0.009681720286607742, + "loss_iou": 0.984375, + "loss_num": 0.07177734375, + "loss_xval": 2.328125, + "num_input_tokens_seen": 16675308, + "step": 132 + }, + { + "epoch": 0.03411787340473289, + "grad_norm": 55.758872985839844, + "learning_rate": 5e-06, + "loss": 2.5283, + "num_input_tokens_seen": 16801708, + "step": 133 + }, + { + "epoch": 0.03411787340473289, + "loss": 2.5442214012145996, + "loss_ce": 0.007112029939889908, + "loss_iou": 1.078125, + "loss_num": 0.07763671875, + "loss_xval": 2.53125, + "num_input_tokens_seen": 16801708, + "step": 133 + }, + { + "epoch": 0.03437439876867825, + "grad_norm": 52.744384765625, + "learning_rate": 5e-06, + "loss": 2.6432, + "num_input_tokens_seen": 16929124, + "step": 134 + }, + { + "epoch": 0.03437439876867825, + "loss": 2.6460366249084473, + "loss_ce": 0.005411533638834953, + "loss_iou": 1.0703125, + "loss_num": 0.0986328125, + "loss_xval": 2.640625, + "num_input_tokens_seen": 16929124, + "step": 134 + }, + { + "epoch": 0.034630924132623614, + "grad_norm": 63.592830657958984, + "learning_rate": 5e-06, + "loss": 2.4053, + "num_input_tokens_seen": 17055248, + "step": 135 + }, + { + "epoch": 0.034630924132623614, + "loss": 2.4392905235290527, + "loss_ce": 0.007649864535778761, + "loss_iou": 1.0234375, + "loss_num": 0.0771484375, + "loss_xval": 2.4375, + "num_input_tokens_seen": 17055248, + "step": 135 + }, + { + "epoch": 0.034887449496568974, + "grad_norm": 43.54157257080078, + "learning_rate": 5e-06, + "loss": 2.7086, + "num_input_tokens_seen": 17182404, + "step": 136 + }, + { + "epoch": 0.034887449496568974, + "loss": 2.6888020038604736, + "loss_ce": 0.005208211950957775, + "loss_iou": 1.0859375, + "loss_num": 0.10205078125, + "loss_xval": 2.6875, + "num_input_tokens_seen": 17182404, + "step": 136 + }, + { + "epoch": 0.035143974860514335, + "grad_norm": 43.190006256103516, + "learning_rate": 5e-06, + "loss": 2.444, + "num_input_tokens_seen": 17308708, + "step": 137 + }, + { + "epoch": 0.035143974860514335, + "loss": 2.3309106826782227, + "loss_ce": 0.006691889371722937, + "loss_iou": 0.98828125, + "loss_num": 0.0703125, + "loss_xval": 2.328125, + "num_input_tokens_seen": 17308708, + "step": 137 + }, + { + "epoch": 0.035400500224459695, + "grad_norm": 37.575782775878906, + "learning_rate": 5e-06, + "loss": 2.4069, + "num_input_tokens_seen": 17435260, + "step": 138 + }, + { + "epoch": 0.035400500224459695, + "loss": 2.315492630004883, + "loss_ce": 0.002992505207657814, + "loss_iou": 0.99609375, + "loss_num": 0.064453125, + "loss_xval": 2.3125, + "num_input_tokens_seen": 17435260, + "step": 138 + }, + { + "epoch": 0.035657025588405056, + "grad_norm": 55.08070373535156, + "learning_rate": 5e-06, + "loss": 2.4393, + "num_input_tokens_seen": 17561664, + "step": 139 + }, + { + "epoch": 0.035657025588405056, + "loss": 2.3562490940093994, + "loss_ce": 0.002733518835157156, + "loss_iou": 1.0078125, + "loss_num": 0.06787109375, + "loss_xval": 2.359375, + "num_input_tokens_seen": 17561664, + "step": 139 + }, + { + "epoch": 0.035913550952350416, + "grad_norm": 46.25513458251953, + "learning_rate": 5e-06, + "loss": 2.5279, + "num_input_tokens_seen": 17687720, + "step": 140 + }, + { + "epoch": 0.035913550952350416, + "loss": 2.656062126159668, + "loss_ce": 0.007624673657119274, + "loss_iou": 1.1171875, + "loss_num": 0.083984375, + "loss_xval": 2.65625, + "num_input_tokens_seen": 17687720, + "step": 140 + }, + { + "epoch": 0.03617007631629578, + "grad_norm": 24.867122650146484, + "learning_rate": 5e-06, + "loss": 2.3909, + "num_input_tokens_seen": 17815004, + "step": 141 + }, + { + "epoch": 0.03617007631629578, + "loss": 2.4053077697753906, + "loss_ce": 0.00491719413548708, + "loss_iou": 0.984375, + "loss_num": 0.0869140625, + "loss_xval": 2.40625, + "num_input_tokens_seen": 17815004, + "step": 141 + }, + { + "epoch": 0.03642660168024114, + "grad_norm": 56.22255325317383, + "learning_rate": 5e-06, + "loss": 2.4203, + "num_input_tokens_seen": 17941124, + "step": 142 + }, + { + "epoch": 0.03642660168024114, + "loss": 2.6103010177612305, + "loss_ce": 0.0067855073139071465, + "loss_iou": 1.109375, + "loss_num": 0.07763671875, + "loss_xval": 2.609375, + "num_input_tokens_seen": 17941124, + "step": 142 + }, + { + "epoch": 0.03668312704418649, + "grad_norm": 45.10807418823242, + "learning_rate": 5e-06, + "loss": 2.681, + "num_input_tokens_seen": 18067868, + "step": 143 + }, + { + "epoch": 0.03668312704418649, + "loss": 2.650552272796631, + "loss_ce": 0.007973975501954556, + "loss_iou": 1.03125, + "loss_num": 0.11376953125, + "loss_xval": 2.640625, + "num_input_tokens_seen": 18067868, + "step": 143 + }, + { + "epoch": 0.03693965240813185, + "grad_norm": 36.432220458984375, + "learning_rate": 5e-06, + "loss": 2.5193, + "num_input_tokens_seen": 18194136, + "step": 144 + }, + { + "epoch": 0.03693965240813185, + "loss": 2.5107741355895996, + "loss_ce": 0.0049149044789373875, + "loss_iou": 1.0234375, + "loss_num": 0.091796875, + "loss_xval": 2.5, + "num_input_tokens_seen": 18194136, + "step": 144 + }, + { + "epoch": 0.03719617777207721, + "grad_norm": 40.214908599853516, + "learning_rate": 5e-06, + "loss": 2.3294, + "num_input_tokens_seen": 18321684, + "step": 145 + }, + { + "epoch": 0.03719617777207721, + "loss": 2.3017754554748535, + "loss_ce": 0.007830099202692509, + "loss_iou": 0.97265625, + "loss_num": 0.06982421875, + "loss_xval": 2.296875, + "num_input_tokens_seen": 18321684, + "step": 145 + }, + { + "epoch": 0.03745270313602257, + "grad_norm": 51.045921325683594, + "learning_rate": 5e-06, + "loss": 2.3483, + "num_input_tokens_seen": 18446856, + "step": 146 + }, + { + "epoch": 0.03745270313602257, + "loss": 2.31803035736084, + "loss_ce": 0.0035773152485489845, + "loss_iou": 0.99609375, + "loss_num": 0.064453125, + "loss_xval": 2.3125, + "num_input_tokens_seen": 18446856, + "step": 146 + }, + { + "epoch": 0.03770922849996793, + "grad_norm": 41.75608444213867, + "learning_rate": 5e-06, + "loss": 2.4412, + "num_input_tokens_seen": 18573100, + "step": 147 + }, + { + "epoch": 0.03770922849996793, + "loss": 2.3477277755737305, + "loss_ce": 0.003977839834988117, + "loss_iou": 1.0078125, + "loss_num": 0.064453125, + "loss_xval": 2.34375, + "num_input_tokens_seen": 18573100, + "step": 147 + }, + { + "epoch": 0.03796575386391329, + "grad_norm": 77.06361389160156, + "learning_rate": 5e-06, + "loss": 2.508, + "num_input_tokens_seen": 18700396, + "step": 148 + }, + { + "epoch": 0.03796575386391329, + "loss": 2.6103341579437256, + "loss_ce": 0.008771702647209167, + "loss_iou": 1.140625, + "loss_num": 0.064453125, + "loss_xval": 2.59375, + "num_input_tokens_seen": 18700396, + "step": 148 + }, + { + "epoch": 0.03822227922785865, + "grad_norm": 41.14444351196289, + "learning_rate": 5e-06, + "loss": 2.7476, + "num_input_tokens_seen": 18826688, + "step": 149 + }, + { + "epoch": 0.03822227922785865, + "loss": 2.814434766769409, + "loss_ce": 0.015606727451086044, + "loss_iou": 1.0703125, + "loss_num": 0.1318359375, + "loss_xval": 2.796875, + "num_input_tokens_seen": 18826688, + "step": 149 + }, + { + "epoch": 0.038478804591804014, + "grad_norm": 35.16160583496094, + "learning_rate": 5e-06, + "loss": 2.7287, + "num_input_tokens_seen": 18952516, + "step": 150 + }, + { + "epoch": 0.038478804591804014, + "loss": 2.635441541671753, + "loss_ce": 0.006535241845995188, + "loss_iou": 1.015625, + "loss_num": 0.11865234375, + "loss_xval": 2.625, + "num_input_tokens_seen": 18952516, + "step": 150 + }, + { + "epoch": 0.038735329955749374, + "grad_norm": 22.285537719726562, + "learning_rate": 5e-06, + "loss": 2.5743, + "num_input_tokens_seen": 19079276, + "step": 151 + }, + { + "epoch": 0.038735329955749374, + "loss": 2.5195555686950684, + "loss_ce": 0.001977392239496112, + "loss_iou": 1.0234375, + "loss_num": 0.09423828125, + "loss_xval": 2.515625, + "num_input_tokens_seen": 19079276, + "step": 151 + }, + { + "epoch": 0.038991855319694735, + "grad_norm": 30.596643447875977, + "learning_rate": 5e-06, + "loss": 2.3908, + "num_input_tokens_seen": 19205692, + "step": 152 + }, + { + "epoch": 0.038991855319694735, + "loss": 2.365237236022949, + "loss_ce": 0.00781540758907795, + "loss_iou": 0.98046875, + "loss_num": 0.0791015625, + "loss_xval": 2.359375, + "num_input_tokens_seen": 19205692, + "step": 152 + }, + { + "epoch": 0.039248380683640095, + "grad_norm": 45.002567291259766, + "learning_rate": 5e-06, + "loss": 2.351, + "num_input_tokens_seen": 19331996, + "step": 153 + }, + { + "epoch": 0.039248380683640095, + "loss": 2.374375581741333, + "loss_ce": 0.0032818051986396313, + "loss_iou": 1.015625, + "loss_num": 0.06787109375, + "loss_xval": 2.375, + "num_input_tokens_seen": 19331996, + "step": 153 + }, + { + "epoch": 0.039504906047585456, + "grad_norm": 26.576692581176758, + "learning_rate": 5e-06, + "loss": 2.4015, + "num_input_tokens_seen": 19458672, + "step": 154 + }, + { + "epoch": 0.039504906047585456, + "loss": 2.4497742652893066, + "loss_ce": 0.003485127817839384, + "loss_iou": 1.0078125, + "loss_num": 0.0849609375, + "loss_xval": 2.453125, + "num_input_tokens_seen": 19458672, + "step": 154 + }, + { + "epoch": 0.039761431411530816, + "grad_norm": 53.44624710083008, + "learning_rate": 5e-06, + "loss": 2.3983, + "num_input_tokens_seen": 19584388, + "step": 155 + }, + { + "epoch": 0.039761431411530816, + "loss": 2.3043508529663086, + "loss_ce": 0.003569577354937792, + "loss_iou": 1.0234375, + "loss_num": 0.049072265625, + "loss_xval": 2.296875, + "num_input_tokens_seen": 19584388, + "step": 155 + }, + { + "epoch": 0.04001795677547618, + "grad_norm": 49.80558395385742, + "learning_rate": 5e-06, + "loss": 2.5676, + "num_input_tokens_seen": 19710324, + "step": 156 + }, + { + "epoch": 0.04001795677547618, + "loss": 2.556002140045166, + "loss_ce": 0.0052209896966814995, + "loss_iou": 1.046875, + "loss_num": 0.09326171875, + "loss_xval": 2.546875, + "num_input_tokens_seen": 19710324, + "step": 156 + }, + { + "epoch": 0.04027448213942154, + "grad_norm": 29.222726821899414, + "learning_rate": 5e-06, + "loss": 2.3567, + "num_input_tokens_seen": 19836852, + "step": 157 + }, + { + "epoch": 0.04027448213942154, + "loss": 2.418010950088501, + "loss_ce": 0.007854770869016647, + "loss_iou": 0.9921875, + "loss_num": 0.08447265625, + "loss_xval": 2.40625, + "num_input_tokens_seen": 19836852, + "step": 157 + }, + { + "epoch": 0.0405310075033669, + "grad_norm": 104.57770538330078, + "learning_rate": 5e-06, + "loss": 2.4729, + "num_input_tokens_seen": 19964468, + "step": 158 + }, + { + "epoch": 0.0405310075033669, + "loss": 2.5433290004730225, + "loss_ce": 0.00426642969250679, + "loss_iou": 1.1328125, + "loss_num": 0.05615234375, + "loss_xval": 2.53125, + "num_input_tokens_seen": 19964468, + "step": 158 + }, + { + "epoch": 0.04078753286731226, + "grad_norm": 35.59853744506836, + "learning_rate": 5e-06, + "loss": 2.7104, + "num_input_tokens_seen": 20091148, + "step": 159 + }, + { + "epoch": 0.04078753286731226, + "loss": 2.811880588531494, + "loss_ce": 0.0032869181595742702, + "loss_iou": 1.0859375, + "loss_num": 0.126953125, + "loss_xval": 2.8125, + "num_input_tokens_seen": 20091148, + "step": 159 + }, + { + "epoch": 0.04104405823125762, + "grad_norm": 27.2836856842041, + "learning_rate": 5e-06, + "loss": 2.6683, + "num_input_tokens_seen": 20217292, + "step": 160 + }, + { + "epoch": 0.04104405823125762, + "loss": 2.718921661376953, + "loss_ce": 0.0079840999096632, + "loss_iou": 1.0546875, + "loss_num": 0.1201171875, + "loss_xval": 2.71875, + "num_input_tokens_seen": 20217292, + "step": 160 + }, + { + "epoch": 0.04130058359520298, + "grad_norm": 24.627878189086914, + "learning_rate": 5e-06, + "loss": 2.531, + "num_input_tokens_seen": 20343688, + "step": 161 + }, + { + "epoch": 0.04130058359520298, + "loss": 2.4754457473754883, + "loss_ce": 0.006695803254842758, + "loss_iou": 0.984375, + "loss_num": 0.09912109375, + "loss_xval": 2.46875, + "num_input_tokens_seen": 20343688, + "step": 161 + }, + { + "epoch": 0.04155710895914833, + "grad_norm": 27.000904083251953, + "learning_rate": 5e-06, + "loss": 2.4639, + "num_input_tokens_seen": 20470492, + "step": 162 + }, + { + "epoch": 0.04155710895914833, + "loss": 2.476573944091797, + "loss_ce": 0.007824004627764225, + "loss_iou": 1.015625, + "loss_num": 0.08740234375, + "loss_xval": 2.46875, + "num_input_tokens_seen": 20470492, + "step": 162 + }, + { + "epoch": 0.04181363432309369, + "grad_norm": 31.578752517700195, + "learning_rate": 5e-06, + "loss": 2.4406, + "num_input_tokens_seen": 20596460, + "step": 163 + }, + { + "epoch": 0.04181363432309369, + "loss": 2.342241048812866, + "loss_ce": 0.0043504973873496056, + "loss_iou": 0.9765625, + "loss_num": 0.0771484375, + "loss_xval": 2.34375, + "num_input_tokens_seen": 20596460, + "step": 163 + }, + { + "epoch": 0.04207015968703905, + "grad_norm": 27.698179244995117, + "learning_rate": 5e-06, + "loss": 2.3545, + "num_input_tokens_seen": 20723124, + "step": 164 + }, + { + "epoch": 0.04207015968703905, + "loss": 2.4864532947540283, + "loss_ce": 0.009890913963317871, + "loss_iou": 1.03125, + "loss_num": 0.08447265625, + "loss_xval": 2.46875, + "num_input_tokens_seen": 20723124, + "step": 164 + }, + { + "epoch": 0.042326685050984414, + "grad_norm": 69.10855865478516, + "learning_rate": 5e-06, + "loss": 2.4006, + "num_input_tokens_seen": 20849976, + "step": 165 + }, + { + "epoch": 0.042326685050984414, + "loss": 2.431546211242676, + "loss_ce": 0.005765097681432962, + "loss_iou": 1.0703125, + "loss_num": 0.05859375, + "loss_xval": 2.421875, + "num_input_tokens_seen": 20849976, + "step": 165 + }, + { + "epoch": 0.042583210414929774, + "grad_norm": 34.0660514831543, + "learning_rate": 5e-06, + "loss": 2.6259, + "num_input_tokens_seen": 20977868, + "step": 166 + }, + { + "epoch": 0.042583210414929774, + "loss": 2.6786484718322754, + "loss_ce": 0.004820517264306545, + "loss_iou": 1.0625, + "loss_num": 0.10791015625, + "loss_xval": 2.671875, + "num_input_tokens_seen": 20977868, + "step": 166 + }, + { + "epoch": 0.042839735778875135, + "grad_norm": 27.81917953491211, + "learning_rate": 5e-06, + "loss": 2.5062, + "num_input_tokens_seen": 21104164, + "step": 167 + }, + { + "epoch": 0.042839735778875135, + "loss": 2.4329450130462646, + "loss_ce": 0.007163737900555134, + "loss_iou": 0.98828125, + "loss_num": 0.08984375, + "loss_xval": 2.421875, + "num_input_tokens_seen": 21104164, + "step": 167 + }, + { + "epoch": 0.043096261142820495, + "grad_norm": 42.72616195678711, + "learning_rate": 5e-06, + "loss": 2.4879, + "num_input_tokens_seen": 21230136, + "step": 168 + }, + { + "epoch": 0.043096261142820495, + "loss": 2.4784064292907715, + "loss_ce": 0.003797078737989068, + "loss_iou": 1.0078125, + "loss_num": 0.0908203125, + "loss_xval": 2.46875, + "num_input_tokens_seen": 21230136, + "step": 168 + }, + { + "epoch": 0.043352786506765856, + "grad_norm": 29.40851593017578, + "learning_rate": 5e-06, + "loss": 2.3245, + "num_input_tokens_seen": 21356484, + "step": 169 + }, + { + "epoch": 0.043352786506765856, + "loss": 2.3569984436035156, + "loss_ce": 0.003482839558273554, + "loss_iou": 0.9921875, + "loss_num": 0.0732421875, + "loss_xval": 2.359375, + "num_input_tokens_seen": 21356484, + "step": 169 + }, + { + "epoch": 0.043609311870711216, + "grad_norm": 49.04379653930664, + "learning_rate": 5e-06, + "loss": 2.4124, + "num_input_tokens_seen": 21482376, + "step": 170 + }, + { + "epoch": 0.043609311870711216, + "loss": 2.2562036514282227, + "loss_ce": 0.002297324826940894, + "loss_iou": 0.9921875, + "loss_num": 0.053955078125, + "loss_xval": 2.25, + "num_input_tokens_seen": 21482376, + "step": 170 + }, + { + "epoch": 0.04386583723465658, + "grad_norm": 44.504146575927734, + "learning_rate": 5e-06, + "loss": 2.4539, + "num_input_tokens_seen": 21608092, + "step": 171 + }, + { + "epoch": 0.04386583723465658, + "loss": 2.4906818866729736, + "loss_ce": 0.003377090208232403, + "loss_iou": 1.0546875, + "loss_num": 0.0751953125, + "loss_xval": 2.484375, + "num_input_tokens_seen": 21608092, + "step": 171 + }, + { + "epoch": 0.04412236259860194, + "grad_norm": 37.75178909301758, + "learning_rate": 5e-06, + "loss": 2.4313, + "num_input_tokens_seen": 21734660, + "step": 172 + }, + { + "epoch": 0.04412236259860194, + "loss": 2.3544721603393555, + "loss_ce": 0.004862620495259762, + "loss_iou": 1.0, + "loss_num": 0.068359375, + "loss_xval": 2.34375, + "num_input_tokens_seen": 21734660, + "step": 172 + }, + { + "epoch": 0.0443788879625473, + "grad_norm": 65.47736358642578, + "learning_rate": 5e-06, + "loss": 2.3955, + "num_input_tokens_seen": 21861688, + "step": 173 + }, + { + "epoch": 0.0443788879625473, + "loss": 2.2382354736328125, + "loss_ce": 0.001907255849801004, + "loss_iou": 1.0078125, + "loss_num": 0.045166015625, + "loss_xval": 2.234375, + "num_input_tokens_seen": 21861688, + "step": 173 + }, + { + "epoch": 0.04463541332649266, + "grad_norm": 34.77819061279297, + "learning_rate": 5e-06, + "loss": 2.6578, + "num_input_tokens_seen": 21988696, + "step": 174 + }, + { + "epoch": 0.04463541332649266, + "loss": 2.6666500568389893, + "loss_ce": 0.006493799388408661, + "loss_iou": 1.0625, + "loss_num": 0.10693359375, + "loss_xval": 2.65625, + "num_input_tokens_seen": 21988696, + "step": 174 + }, + { + "epoch": 0.04489193869043802, + "grad_norm": 28.620773315429688, + "learning_rate": 5e-06, + "loss": 2.492, + "num_input_tokens_seen": 22113460, + "step": 175 + }, + { + "epoch": 0.04489193869043802, + "loss": 2.4219255447387695, + "loss_ce": 0.007862861268222332, + "loss_iou": 0.984375, + "loss_num": 0.08984375, + "loss_xval": 2.40625, + "num_input_tokens_seen": 22113460, + "step": 175 + }, + { + "epoch": 0.04514846405438338, + "grad_norm": 26.363733291625977, + "learning_rate": 5e-06, + "loss": 2.2947, + "num_input_tokens_seen": 22239964, + "step": 176 + }, + { + "epoch": 0.04514846405438338, + "loss": 2.2069625854492188, + "loss_ce": 0.006767282262444496, + "loss_iou": 0.9296875, + "loss_num": 0.0673828125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 22239964, + "step": 176 + }, + { + "epoch": 0.04540498941832874, + "grad_norm": 74.30191040039062, + "learning_rate": 5e-06, + "loss": 2.3601, + "num_input_tokens_seen": 22366812, + "step": 177 + }, + { + "epoch": 0.04540498941832874, + "loss": 2.301196575164795, + "loss_ce": 0.0023684795014560223, + "loss_iou": 1.03125, + "loss_num": 0.046630859375, + "loss_xval": 2.296875, + "num_input_tokens_seen": 22366812, + "step": 177 + }, + { + "epoch": 0.0456615147822741, + "grad_norm": 27.58796501159668, + "learning_rate": 5e-06, + "loss": 2.5907, + "num_input_tokens_seen": 22491376, + "step": 178 + }, + { + "epoch": 0.0456615147822741, + "loss": 2.649542808532715, + "loss_ce": 0.0108711002394557, + "loss_iou": 1.0390625, + "loss_num": 0.11279296875, + "loss_xval": 2.640625, + "num_input_tokens_seen": 22491376, + "step": 178 + }, + { + "epoch": 0.04591804014621946, + "grad_norm": 24.595365524291992, + "learning_rate": 5e-06, + "loss": 2.5772, + "num_input_tokens_seen": 22617448, + "step": 179 + }, + { + "epoch": 0.04591804014621946, + "loss": 2.5798187255859375, + "loss_ce": 0.0016937287291511893, + "loss_iou": 1.0234375, + "loss_num": 0.10595703125, + "loss_xval": 2.578125, + "num_input_tokens_seen": 22617448, + "step": 179 + }, + { + "epoch": 0.04617456551016482, + "grad_norm": 17.898094177246094, + "learning_rate": 5e-06, + "loss": 2.4786, + "num_input_tokens_seen": 22743636, + "step": 180 + }, + { + "epoch": 0.04617456551016482, + "loss": 2.521125316619873, + "loss_ce": 0.0035472132731229067, + "loss_iou": 1.0234375, + "loss_num": 0.09423828125, + "loss_xval": 2.515625, + "num_input_tokens_seen": 22743636, + "step": 180 + }, + { + "epoch": 0.046431090874110174, + "grad_norm": 23.574811935424805, + "learning_rate": 5e-06, + "loss": 2.2385, + "num_input_tokens_seen": 22871184, + "step": 181 + }, + { + "epoch": 0.046431090874110174, + "loss": 2.221139430999756, + "loss_ce": 0.0023894598707556725, + "loss_iou": 0.9609375, + "loss_num": 0.06005859375, + "loss_xval": 2.21875, + "num_input_tokens_seen": 22871184, + "step": 181 + }, + { + "epoch": 0.046687616238055535, + "grad_norm": 82.54412078857422, + "learning_rate": 5e-06, + "loss": 2.3534, + "num_input_tokens_seen": 22997352, + "step": 182 + }, + { + "epoch": 0.046687616238055535, + "loss": 2.332087755203247, + "loss_ce": 0.0020096718799322844, + "loss_iou": 1.0390625, + "loss_num": 0.05126953125, + "loss_xval": 2.328125, + "num_input_tokens_seen": 22997352, + "step": 182 + }, + { + "epoch": 0.046944141602000895, + "grad_norm": 33.71228790283203, + "learning_rate": 5e-06, + "loss": 2.4805, + "num_input_tokens_seen": 23123184, + "step": 183 + }, + { + "epoch": 0.046944141602000895, + "loss": 2.445564031600952, + "loss_ce": 0.008064089342951775, + "loss_iou": 1.0078125, + "loss_num": 0.08349609375, + "loss_xval": 2.4375, + "num_input_tokens_seen": 23123184, + "step": 183 + }, + { + "epoch": 0.047200666965946256, + "grad_norm": 24.10027313232422, + "learning_rate": 5e-06, + "loss": 2.3772, + "num_input_tokens_seen": 23248520, + "step": 184 + }, + { + "epoch": 0.047200666965946256, + "loss": 2.32912540435791, + "loss_ce": 0.00295360223390162, + "loss_iou": 0.984375, + "loss_num": 0.0712890625, + "loss_xval": 2.328125, + "num_input_tokens_seen": 23248520, + "step": 184 + }, + { + "epoch": 0.047457192329891616, + "grad_norm": 30.000547409057617, + "learning_rate": 5e-06, + "loss": 2.2428, + "num_input_tokens_seen": 23373784, + "step": 185 + }, + { + "epoch": 0.047457192329891616, + "loss": 2.141679525375366, + "loss_ce": 0.0020311579573899508, + "loss_iou": 0.9375, + "loss_num": 0.05322265625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 23373784, + "step": 185 + }, + { + "epoch": 0.04771371769383698, + "grad_norm": 42.21815872192383, + "learning_rate": 5e-06, + "loss": 2.348, + "num_input_tokens_seen": 23500384, + "step": 186 + }, + { + "epoch": 0.04771371769383698, + "loss": 2.349303722381592, + "loss_ce": 0.004577181302011013, + "loss_iou": 1.0234375, + "loss_num": 0.06103515625, + "loss_xval": 2.34375, + "num_input_tokens_seen": 23500384, + "step": 186 + }, + { + "epoch": 0.04797024305778234, + "grad_norm": 49.43733596801758, + "learning_rate": 5e-06, + "loss": 2.49, + "num_input_tokens_seen": 23626212, + "step": 187 + }, + { + "epoch": 0.04797024305778234, + "loss": 2.4142708778381348, + "loss_ce": 0.005091257393360138, + "loss_iou": 1.046875, + "loss_num": 0.064453125, + "loss_xval": 2.40625, + "num_input_tokens_seen": 23626212, + "step": 187 + }, + { + "epoch": 0.0482267684217277, + "grad_norm": 64.23003387451172, + "learning_rate": 5e-06, + "loss": 2.3082, + "num_input_tokens_seen": 23752828, + "step": 188 + }, + { + "epoch": 0.0482267684217277, + "loss": 2.2004072666168213, + "loss_ce": 0.0021651112474501133, + "loss_iou": 0.9765625, + "loss_num": 0.04931640625, + "loss_xval": 2.203125, + "num_input_tokens_seen": 23752828, + "step": 188 + }, + { + "epoch": 0.04848329378567306, + "grad_norm": 51.99241638183594, + "learning_rate": 5e-06, + "loss": 2.4372, + "num_input_tokens_seen": 23878668, + "step": 189 + }, + { + "epoch": 0.04848329378567306, + "loss": 2.38288950920105, + "loss_ce": 0.003983333706855774, + "loss_iou": 1.0234375, + "loss_num": 0.068359375, + "loss_xval": 2.375, + "num_input_tokens_seen": 23878668, + "step": 189 + }, + { + "epoch": 0.04873981914961842, + "grad_norm": 27.47298240661621, + "learning_rate": 5e-06, + "loss": 2.325, + "num_input_tokens_seen": 24005520, + "step": 190 + }, + { + "epoch": 0.04873981914961842, + "loss": 2.2082619667053223, + "loss_ce": 0.0031840167939662933, + "loss_iou": 0.953125, + "loss_num": 0.059814453125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 24005520, + "step": 190 + }, + { + "epoch": 0.04899634451356378, + "grad_norm": 45.163719177246094, + "learning_rate": 5e-06, + "loss": 2.1703, + "num_input_tokens_seen": 24130628, + "step": 191 + }, + { + "epoch": 0.04899634451356378, + "loss": 2.137056827545166, + "loss_ce": 0.0032677161507308483, + "loss_iou": 0.9375, + "loss_num": 0.051513671875, + "loss_xval": 2.140625, + "num_input_tokens_seen": 24130628, + "step": 191 + }, + { + "epoch": 0.04925286987750914, + "grad_norm": 36.66967010498047, + "learning_rate": 5e-06, + "loss": 2.1986, + "num_input_tokens_seen": 24256348, + "step": 192 + }, + { + "epoch": 0.04925286987750914, + "loss": 2.207369804382324, + "loss_ce": 0.009127775207161903, + "loss_iou": 0.92578125, + "loss_num": 0.06884765625, + "loss_xval": 2.203125, + "num_input_tokens_seen": 24256348, + "step": 192 + }, + { + "epoch": 0.0495093952414545, + "grad_norm": 68.89830017089844, + "learning_rate": 5e-06, + "loss": 2.4151, + "num_input_tokens_seen": 24382740, + "step": 193 + }, + { + "epoch": 0.0495093952414545, + "loss": 2.555818796157837, + "loss_ce": 0.005037633702158928, + "loss_iou": 1.125, + "loss_num": 0.06103515625, + "loss_xval": 2.546875, + "num_input_tokens_seen": 24382740, + "step": 193 + }, + { + "epoch": 0.04976592060539986, + "grad_norm": 31.592695236206055, + "learning_rate": 5e-06, + "loss": 2.5983, + "num_input_tokens_seen": 24509012, + "step": 194 + }, + { + "epoch": 0.04976592060539986, + "loss": 2.5728516578674316, + "loss_ce": 0.006445457227528095, + "loss_iou": 1.0390625, + "loss_num": 0.09765625, + "loss_xval": 2.5625, + "num_input_tokens_seen": 24509012, + "step": 194 + }, + { + "epoch": 0.05002244596934522, + "grad_norm": 28.23805809020996, + "learning_rate": 5e-06, + "loss": 2.4319, + "num_input_tokens_seen": 24635532, + "step": 195 + }, + { + "epoch": 0.05002244596934522, + "loss": 2.311415195465088, + "loss_ce": 0.0008684303611516953, + "loss_iou": 0.96484375, + "loss_num": 0.07666015625, + "loss_xval": 2.3125, + "num_input_tokens_seen": 24635532, + "step": 195 + }, + { + "epoch": 0.05027897133329058, + "grad_norm": 27.062164306640625, + "learning_rate": 5e-06, + "loss": 2.3428, + "num_input_tokens_seen": 24761112, + "step": 196 + }, + { + "epoch": 0.05027897133329058, + "loss": 2.257509231567383, + "loss_ce": 0.0036028530448675156, + "loss_iou": 0.96875, + "loss_num": 0.0634765625, + "loss_xval": 2.25, + "num_input_tokens_seen": 24761112, + "step": 196 + }, + { + "epoch": 0.05053549669723594, + "grad_norm": 43.7282829284668, + "learning_rate": 5e-06, + "loss": 2.2594, + "num_input_tokens_seen": 24888080, + "step": 197 + }, + { + "epoch": 0.05053549669723594, + "loss": 2.1552748680114746, + "loss_ce": 0.003907795064151287, + "loss_iou": 0.9296875, + "loss_num": 0.059326171875, + "loss_xval": 2.15625, + "num_input_tokens_seen": 24888080, + "step": 197 + }, + { + "epoch": 0.0507920220611813, + "grad_norm": 43.773223876953125, + "learning_rate": 5e-06, + "loss": 2.3791, + "num_input_tokens_seen": 25015048, + "step": 198 + }, + { + "epoch": 0.0507920220611813, + "loss": 2.2641029357910156, + "loss_ce": 0.007266967091709375, + "loss_iou": 0.984375, + "loss_num": 0.05712890625, + "loss_xval": 2.25, + "num_input_tokens_seen": 25015048, + "step": 198 + }, + { + "epoch": 0.05104854742512666, + "grad_norm": 43.72599411010742, + "learning_rate": 5e-06, + "loss": 2.2514, + "num_input_tokens_seen": 25140724, + "step": 199 + }, + { + "epoch": 0.05104854742512666, + "loss": 2.084597110748291, + "loss_ce": 0.001589204533956945, + "loss_iou": 0.9140625, + "loss_num": 0.051025390625, + "loss_xval": 2.078125, + "num_input_tokens_seen": 25140724, + "step": 199 + }, + { + "epoch": 0.051305072789072016, + "grad_norm": 54.65023422241211, + "learning_rate": 5e-06, + "loss": 2.2309, + "num_input_tokens_seen": 25266656, + "step": 200 + }, + { + "epoch": 0.051305072789072016, + "loss": 2.1164309978485107, + "loss_ce": 0.006079402752220631, + "loss_iou": 0.94921875, + "loss_num": 0.041259765625, + "loss_xval": 2.109375, + "num_input_tokens_seen": 25266656, + "step": 200 + }, + { + "epoch": 0.05156159815301738, + "grad_norm": 46.44731903076172, + "learning_rate": 5e-06, + "loss": 2.4071, + "num_input_tokens_seen": 25392340, + "step": 201 + }, + { + "epoch": 0.05156159815301738, + "loss": 2.3556690216064453, + "loss_ce": 0.004106595646589994, + "loss_iou": 0.99609375, + "loss_num": 0.0712890625, + "loss_xval": 2.34375, + "num_input_tokens_seen": 25392340, + "step": 201 + }, + { + "epoch": 0.05181812351696274, + "grad_norm": 30.223947525024414, + "learning_rate": 5e-06, + "loss": 2.2721, + "num_input_tokens_seen": 25519364, + "step": 202 + }, + { + "epoch": 0.05181812351696274, + "loss": 2.3477423191070557, + "loss_ce": 0.00789845734834671, + "loss_iou": 1.0, + "loss_num": 0.06787109375, + "loss_xval": 2.34375, + "num_input_tokens_seen": 25519364, + "step": 202 + }, + { + "epoch": 0.0520746488809081, + "grad_norm": 37.88806915283203, + "learning_rate": 5e-06, + "loss": 2.1818, + "num_input_tokens_seen": 25645008, + "step": 203 + }, + { + "epoch": 0.0520746488809081, + "loss": 2.2638063430786133, + "loss_ce": 0.008923721499741077, + "loss_iou": 0.9921875, + "loss_num": 0.054443359375, + "loss_xval": 2.25, + "num_input_tokens_seen": 25645008, + "step": 203 + }, + { + "epoch": 0.05233117424485346, + "grad_norm": 55.52022171020508, + "learning_rate": 5e-06, + "loss": 2.2548, + "num_input_tokens_seen": 25773172, + "step": 204 + }, + { + "epoch": 0.05233117424485346, + "loss": 2.2850098609924316, + "loss_ce": 0.0057130418717861176, + "loss_iou": 1.0, + "loss_num": 0.056396484375, + "loss_xval": 2.28125, + "num_input_tokens_seen": 25773172, + "step": 204 + }, + { + "epoch": 0.05258769960879882, + "grad_norm": 43.69260025024414, + "learning_rate": 5e-06, + "loss": 2.4098, + "num_input_tokens_seen": 25899220, + "step": 205 + }, + { + "epoch": 0.05258769960879882, + "loss": 2.3387224674224854, + "loss_ce": 0.0027849748730659485, + "loss_iou": 1.0078125, + "loss_num": 0.064453125, + "loss_xval": 2.34375, + "num_input_tokens_seen": 25899220, + "step": 205 + }, + { + "epoch": 0.05284422497274418, + "grad_norm": 49.544219970703125, + "learning_rate": 5e-06, + "loss": 2.3777, + "num_input_tokens_seen": 26026028, + "step": 206 + }, + { + "epoch": 0.05284422497274418, + "loss": 2.5157861709594727, + "loss_ce": 0.00406761234626174, + "loss_iou": 1.0625, + "loss_num": 0.07666015625, + "loss_xval": 2.515625, + "num_input_tokens_seen": 26026028, + "step": 206 + }, + { + "epoch": 0.05310075033668954, + "grad_norm": 36.33229446411133, + "learning_rate": 5e-06, + "loss": 2.4065, + "num_input_tokens_seen": 26153032, + "step": 207 + }, + { + "epoch": 0.05310075033668954, + "loss": 2.347304344177246, + "loss_ce": 0.003554383059963584, + "loss_iou": 0.984375, + "loss_num": 0.0751953125, + "loss_xval": 2.34375, + "num_input_tokens_seen": 26153032, + "step": 207 + }, + { + "epoch": 0.0533572757006349, + "grad_norm": 40.57438278198242, + "learning_rate": 5e-06, + "loss": 2.1256, + "num_input_tokens_seen": 26279780, + "step": 208 + }, + { + "epoch": 0.0533572757006349, + "loss": 2.0592567920684814, + "loss_ce": 0.006522357929497957, + "loss_iou": 0.89453125, + "loss_num": 0.052734375, + "loss_xval": 2.046875, + "num_input_tokens_seen": 26279780, + "step": 208 + }, + { + "epoch": 0.05361380106458026, + "grad_norm": 37.91270065307617, + "learning_rate": 5e-06, + "loss": 2.109, + "num_input_tokens_seen": 26406588, + "step": 209 + }, + { + "epoch": 0.05361380106458026, + "loss": 2.1153314113616943, + "loss_ce": 0.0020501683466136456, + "loss_iou": 0.93359375, + "loss_num": 0.0498046875, + "loss_xval": 2.109375, + "num_input_tokens_seen": 26406588, + "step": 209 + }, + { + "epoch": 0.05387032642852562, + "grad_norm": 54.453285217285156, + "learning_rate": 5e-06, + "loss": 2.1622, + "num_input_tokens_seen": 26532224, + "step": 210 + }, + { + "epoch": 0.05387032642852562, + "loss": 2.2607522010803223, + "loss_ce": 0.0039160954765975475, + "loss_iou": 0.9921875, + "loss_num": 0.05419921875, + "loss_xval": 2.25, + "num_input_tokens_seen": 26532224, + "step": 210 + }, + { + "epoch": 0.05412685179247098, + "grad_norm": 69.34442138671875, + "learning_rate": 5e-06, + "loss": 2.2691, + "num_input_tokens_seen": 26658672, + "step": 211 + }, + { + "epoch": 0.05412685179247098, + "loss": 2.2887065410614014, + "loss_ce": 0.0035504265688359737, + "loss_iou": 1.0234375, + "loss_num": 0.046630859375, + "loss_xval": 2.28125, + "num_input_tokens_seen": 26658672, + "step": 211 + }, + { + "epoch": 0.05438337715641634, + "grad_norm": 29.19058609008789, + "learning_rate": 5e-06, + "loss": 2.3397, + "num_input_tokens_seen": 26786108, + "step": 212 + }, + { + "epoch": 0.05438337715641634, + "loss": 2.410947799682617, + "loss_ce": 0.006651013158261776, + "loss_iou": 1.0078125, + "loss_num": 0.076171875, + "loss_xval": 2.40625, + "num_input_tokens_seen": 26786108, + "step": 212 + }, + { + "epoch": 0.0546399025203617, + "grad_norm": 71.75, + "learning_rate": 5e-06, + "loss": 2.3114, + "num_input_tokens_seen": 26912496, + "step": 213 + }, + { + "epoch": 0.0546399025203617, + "loss": 2.317553997039795, + "loss_ce": 0.0031007302459329367, + "loss_iou": 1.0078125, + "loss_num": 0.06005859375, + "loss_xval": 2.3125, + "num_input_tokens_seen": 26912496, + "step": 213 + }, + { + "epoch": 0.05489642788430706, + "grad_norm": 33.20786666870117, + "learning_rate": 5e-06, + "loss": 2.4252, + "num_input_tokens_seen": 27039116, + "step": 214 + }, + { + "epoch": 0.05489642788430706, + "loss": 2.3874313831329346, + "loss_ce": 0.004618733190000057, + "loss_iou": 1.0078125, + "loss_num": 0.072265625, + "loss_xval": 2.375, + "num_input_tokens_seen": 27039116, + "step": 214 + }, + { + "epoch": 0.05515295324825242, + "grad_norm": 33.257835388183594, + "learning_rate": 5e-06, + "loss": 2.1667, + "num_input_tokens_seen": 27164500, + "step": 215 + }, + { + "epoch": 0.05515295324825242, + "loss": 2.2310214042663574, + "loss_ce": 0.0034822672605514526, + "loss_iou": 0.9609375, + "loss_num": 0.06103515625, + "loss_xval": 2.234375, + "num_input_tokens_seen": 27164500, + "step": 215 + }, + { + "epoch": 0.055409478612197784, + "grad_norm": 47.59571075439453, + "learning_rate": 5e-06, + "loss": 2.2111, + "num_input_tokens_seen": 27290612, + "step": 216 + }, + { + "epoch": 0.055409478612197784, + "loss": 2.1596269607543945, + "loss_ce": 0.004353295546025038, + "loss_iou": 0.94921875, + "loss_num": 0.05078125, + "loss_xval": 2.15625, + "num_input_tokens_seen": 27290612, + "step": 216 + }, + { + "epoch": 0.055666003976143144, + "grad_norm": 83.97514343261719, + "learning_rate": 5e-06, + "loss": 2.5174, + "num_input_tokens_seen": 27417140, + "step": 217 + }, + { + "epoch": 0.055666003976143144, + "loss": 2.6816954612731934, + "loss_ce": 0.003960899077355862, + "loss_iou": 1.234375, + "loss_num": 0.043212890625, + "loss_xval": 2.671875, + "num_input_tokens_seen": 27417140, + "step": 217 + }, + { + "epoch": 0.055922529340088505, + "grad_norm": 28.211091995239258, + "learning_rate": 5e-06, + "loss": 2.517, + "num_input_tokens_seen": 27543880, + "step": 218 + }, + { + "epoch": 0.055922529340088505, + "loss": 2.7044153213500977, + "loss_ce": 0.00714997248724103, + "loss_iou": 1.078125, + "loss_num": 0.107421875, + "loss_xval": 2.703125, + "num_input_tokens_seen": 27543880, + "step": 218 + }, + { + "epoch": 0.05617905470403386, + "grad_norm": 22.145076751708984, + "learning_rate": 5e-06, + "loss": 2.4415, + "num_input_tokens_seen": 27669900, + "step": 219 + }, + { + "epoch": 0.05617905470403386, + "loss": 2.4448721408843994, + "loss_ce": 0.007372183725237846, + "loss_iou": 0.9921875, + "loss_num": 0.0908203125, + "loss_xval": 2.4375, + "num_input_tokens_seen": 27669900, + "step": 219 + }, + { + "epoch": 0.05643558006797922, + "grad_norm": 23.535663604736328, + "learning_rate": 5e-06, + "loss": 2.3391, + "num_input_tokens_seen": 27795928, + "step": 220 + }, + { + "epoch": 0.05643558006797922, + "loss": 2.42863130569458, + "loss_ce": 0.0048032496124506, + "loss_iou": 0.9921875, + "loss_num": 0.087890625, + "loss_xval": 2.421875, + "num_input_tokens_seen": 27795928, + "step": 220 + }, + { + "epoch": 0.05669210543192458, + "grad_norm": 24.6494197845459, + "learning_rate": 5e-06, + "loss": 2.1946, + "num_input_tokens_seen": 27922684, + "step": 221 + }, + { + "epoch": 0.05669210543192458, + "loss": 2.306547164916992, + "loss_ce": 0.006742406636476517, + "loss_iou": 0.96875, + "loss_num": 0.07275390625, + "loss_xval": 2.296875, + "num_input_tokens_seen": 27922684, + "step": 221 + }, + { + "epoch": 0.05694863079586994, + "grad_norm": 18.592491149902344, + "learning_rate": 5e-06, + "loss": 2.0691, + "num_input_tokens_seen": 28048640, + "step": 222 + }, + { + "epoch": 0.05694863079586994, + "loss": 2.035184383392334, + "loss_ce": 0.003934322390705347, + "loss_iou": 0.8984375, + "loss_num": 0.046875, + "loss_xval": 2.03125, + "num_input_tokens_seen": 28048640, + "step": 222 + }, + { + "epoch": 0.0572051561598153, + "grad_norm": 68.20878601074219, + "learning_rate": 5e-06, + "loss": 2.0987, + "num_input_tokens_seen": 28174600, + "step": 223 + }, + { + "epoch": 0.0572051561598153, + "loss": 1.956531286239624, + "loss_ce": 0.0024296902120113373, + "loss_iou": 0.88671875, + "loss_num": 0.03564453125, + "loss_xval": 1.953125, + "num_input_tokens_seen": 28174600, + "step": 223 + }, + { + "epoch": 0.05746168152376066, + "grad_norm": 37.80804443359375, + "learning_rate": 5e-06, + "loss": 2.3448, + "num_input_tokens_seen": 28300520, + "step": 224 + }, + { + "epoch": 0.05746168152376066, + "loss": 2.289793014526367, + "loss_ce": 0.006589930504560471, + "loss_iou": 0.98828125, + "loss_num": 0.0625, + "loss_xval": 2.28125, + "num_input_tokens_seen": 28300520, + "step": 224 + }, + { + "epoch": 0.05771820688770602, + "grad_norm": 29.906721115112305, + "learning_rate": 5e-06, + "loss": 2.1799, + "num_input_tokens_seen": 28427104, + "step": 225 + }, + { + "epoch": 0.05771820688770602, + "loss": 2.266775369644165, + "loss_ce": 0.0021270280703902245, + "loss_iou": 0.984375, + "loss_num": 0.05908203125, + "loss_xval": 2.265625, + "num_input_tokens_seen": 28427104, + "step": 225 + }, + { + "epoch": 0.05797473225165138, + "grad_norm": 64.59516143798828, + "learning_rate": 5e-06, + "loss": 2.2515, + "num_input_tokens_seen": 28553072, + "step": 226 + }, + { + "epoch": 0.05797473225165138, + "loss": 2.2763028144836426, + "loss_ce": 0.003841772209852934, + "loss_iou": 1.015625, + "loss_num": 0.047119140625, + "loss_xval": 2.265625, + "num_input_tokens_seen": 28553072, + "step": 226 + }, + { + "epoch": 0.05823125761559674, + "grad_norm": 30.437808990478516, + "learning_rate": 5e-06, + "loss": 2.5057, + "num_input_tokens_seen": 28679832, + "step": 227 + }, + { + "epoch": 0.05823125761559674, + "loss": 2.5206711292266846, + "loss_ce": 0.0030930291395634413, + "loss_iou": 1.0546875, + "loss_num": 0.08154296875, + "loss_xval": 2.515625, + "num_input_tokens_seen": 28679832, + "step": 227 + }, + { + "epoch": 0.0584877829795421, + "grad_norm": 30.77609634399414, + "learning_rate": 5e-06, + "loss": 2.4246, + "num_input_tokens_seen": 28805772, + "step": 228 + }, + { + "epoch": 0.0584877829795421, + "loss": 2.3782191276550293, + "loss_ce": 0.005172381177544594, + "loss_iou": 1.0078125, + "loss_num": 0.0712890625, + "loss_xval": 2.375, + "num_input_tokens_seen": 28805772, + "step": 228 + }, + { + "epoch": 0.05874430834348746, + "grad_norm": 53.74028396606445, + "learning_rate": 5e-06, + "loss": 2.097, + "num_input_tokens_seen": 28932780, + "step": 229 + }, + { + "epoch": 0.05874430834348746, + "loss": 1.9517161846160889, + "loss_ce": 0.0015208933036774397, + "loss_iou": 0.859375, + "loss_num": 0.04541015625, + "loss_xval": 1.953125, + "num_input_tokens_seen": 28932780, + "step": 229 + }, + { + "epoch": 0.05900083370743282, + "grad_norm": 32.93329620361328, + "learning_rate": 5e-06, + "loss": 2.1493, + "num_input_tokens_seen": 29059376, + "step": 230 + }, + { + "epoch": 0.05900083370743282, + "loss": 1.9703216552734375, + "loss_ce": 0.002548185409978032, + "loss_iou": 0.87109375, + "loss_num": 0.044921875, + "loss_xval": 1.96875, + "num_input_tokens_seen": 29059376, + "step": 230 + }, + { + "epoch": 0.059257359071378184, + "grad_norm": 50.18242263793945, + "learning_rate": 5e-06, + "loss": 2.1527, + "num_input_tokens_seen": 29184964, + "step": 231 + }, + { + "epoch": 0.059257359071378184, + "loss": 2.104865550994873, + "loss_ce": 0.0023265578784048557, + "loss_iou": 0.95703125, + "loss_num": 0.037109375, + "loss_xval": 2.109375, + "num_input_tokens_seen": 29184964, + "step": 231 + }, + { + "epoch": 0.059513884435323544, + "grad_norm": 38.76399230957031, + "learning_rate": 5e-06, + "loss": 2.2529, + "num_input_tokens_seen": 29311644, + "step": 232 + }, + { + "epoch": 0.059513884435323544, + "loss": 2.252469539642334, + "loss_ce": 0.006375922821462154, + "loss_iou": 0.9765625, + "loss_num": 0.05810546875, + "loss_xval": 2.25, + "num_input_tokens_seen": 29311644, + "step": 232 + }, + { + "epoch": 0.059770409799268905, + "grad_norm": 33.5998649597168, + "learning_rate": 5e-06, + "loss": 2.0749, + "num_input_tokens_seen": 29436516, + "step": 233 + }, + { + "epoch": 0.059770409799268905, + "loss": 2.130073070526123, + "loss_ce": 0.003119700588285923, + "loss_iou": 0.94140625, + "loss_num": 0.0478515625, + "loss_xval": 2.125, + "num_input_tokens_seen": 29436516, + "step": 233 + }, + { + "epoch": 0.060026935163214265, + "grad_norm": 46.88568878173828, + "learning_rate": 5e-06, + "loss": 2.151, + "num_input_tokens_seen": 29561748, + "step": 234 + }, + { + "epoch": 0.060026935163214265, + "loss": 2.2594504356384277, + "loss_ce": 0.0016379462322220206, + "loss_iou": 1.0078125, + "loss_num": 0.048583984375, + "loss_xval": 2.25, + "num_input_tokens_seen": 29561748, + "step": 234 + }, + { + "epoch": 0.060283460527159625, + "grad_norm": 59.014984130859375, + "learning_rate": 5e-06, + "loss": 2.3208, + "num_input_tokens_seen": 29688228, + "step": 235 + }, + { + "epoch": 0.060283460527159625, + "loss": 2.3178904056549072, + "loss_ce": 0.003437336999922991, + "loss_iou": 1.046875, + "loss_num": 0.046142578125, + "loss_xval": 2.3125, + "num_input_tokens_seen": 29688228, + "step": 235 + }, + { + "epoch": 0.060539985891104986, + "grad_norm": 76.26374816894531, + "learning_rate": 5e-06, + "loss": 2.3334, + "num_input_tokens_seen": 29814532, + "step": 236 + }, + { + "epoch": 0.060539985891104986, + "loss": 2.3111815452575684, + "loss_ce": 0.006494143046438694, + "loss_iou": 1.015625, + "loss_num": 0.054931640625, + "loss_xval": 2.3125, + "num_input_tokens_seen": 29814532, + "step": 236 + }, + { + "epoch": 0.060796511255050346, + "grad_norm": 37.97163772583008, + "learning_rate": 5e-06, + "loss": 2.377, + "num_input_tokens_seen": 29940988, + "step": 237 + }, + { + "epoch": 0.060796511255050346, + "loss": 2.4005744457244873, + "loss_ce": 0.0021370230242609978, + "loss_iou": 1.0, + "loss_num": 0.078125, + "loss_xval": 2.40625, + "num_input_tokens_seen": 29940988, + "step": 237 + }, + { + "epoch": 0.0610530366189957, + "grad_norm": 31.23127555847168, + "learning_rate": 5e-06, + "loss": 2.1217, + "num_input_tokens_seen": 30068792, + "step": 238 + }, + { + "epoch": 0.0610530366189957, + "loss": 2.208750009536743, + "loss_ce": 0.004648377187550068, + "loss_iou": 0.953125, + "loss_num": 0.060791015625, + "loss_xval": 2.203125, + "num_input_tokens_seen": 30068792, + "step": 238 + }, + { + "epoch": 0.06130956198294106, + "grad_norm": 52.3553466796875, + "learning_rate": 5e-06, + "loss": 2.0786, + "num_input_tokens_seen": 30195912, + "step": 239 + }, + { + "epoch": 0.06130956198294106, + "loss": 1.959381103515625, + "loss_ce": 0.0033263680525124073, + "loss_iou": 0.890625, + "loss_num": 0.035888671875, + "loss_xval": 1.953125, + "num_input_tokens_seen": 30195912, + "step": 239 + }, + { + "epoch": 0.06156608734688642, + "grad_norm": 37.35506820678711, + "learning_rate": 5e-06, + "loss": 2.3752, + "num_input_tokens_seen": 30320128, + "step": 240 + }, + { + "epoch": 0.06156608734688642, + "loss": 2.3584609031677246, + "loss_ce": 0.008851448073983192, + "loss_iou": 1.0078125, + "loss_num": 0.06689453125, + "loss_xval": 2.34375, + "num_input_tokens_seen": 30320128, + "step": 240 + }, + { + "epoch": 0.06182261271083178, + "grad_norm": 23.697324752807617, + "learning_rate": 5e-06, + "loss": 2.1053, + "num_input_tokens_seen": 30445792, + "step": 241 + }, + { + "epoch": 0.06182261271083178, + "loss": 2.070606231689453, + "loss_ce": 0.004200078547000885, + "loss_iou": 0.88671875, + "loss_num": 0.05810546875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 30445792, + "step": 241 + }, + { + "epoch": 0.06207913807477714, + "grad_norm": 54.24428939819336, + "learning_rate": 5e-06, + "loss": 2.075, + "num_input_tokens_seen": 30573256, + "step": 242 + }, + { + "epoch": 0.06207913807477714, + "loss": 1.9848737716674805, + "loss_ce": 0.0024519390426576138, + "loss_iou": 0.89453125, + "loss_num": 0.0380859375, + "loss_xval": 1.984375, + "num_input_tokens_seen": 30573256, + "step": 242 + }, + { + "epoch": 0.0623356634387225, + "grad_norm": 59.34233856201172, + "learning_rate": 5e-06, + "loss": 2.0339, + "num_input_tokens_seen": 30700736, + "step": 243 + }, + { + "epoch": 0.0623356634387225, + "loss": 1.979261875152588, + "loss_ce": 0.0036758403293788433, + "loss_iou": 0.87890625, + "loss_num": 0.043212890625, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 30700736, + "step": 243 + }, + { + "epoch": 0.06259218880266787, + "grad_norm": 89.15264129638672, + "learning_rate": 5e-06, + "loss": 2.5587, + "num_input_tokens_seen": 30826556, + "step": 244 + }, + { + "epoch": 0.06259218880266787, + "loss": 2.3324899673461914, + "loss_ce": 0.0033883764408528805, + "loss_iou": 1.078125, + "loss_num": 0.03466796875, + "loss_xval": 2.328125, + "num_input_tokens_seen": 30826556, + "step": 244 + }, + { + "epoch": 0.06284871416661322, + "grad_norm": 28.108736038208008, + "learning_rate": 5e-06, + "loss": 2.4369, + "num_input_tokens_seen": 30952784, + "step": 245 + }, + { + "epoch": 0.06284871416661322, + "loss": 2.3802599906921387, + "loss_ce": 0.0013536261394619942, + "loss_iou": 0.984375, + "loss_num": 0.08203125, + "loss_xval": 2.375, + "num_input_tokens_seen": 30952784, + "step": 245 + }, + { + "epoch": 0.06310523953055859, + "grad_norm": 22.753265380859375, + "learning_rate": 5e-06, + "loss": 2.2559, + "num_input_tokens_seen": 31078836, + "step": 246 + }, + { + "epoch": 0.06310523953055859, + "loss": 2.110809803009033, + "loss_ce": 0.0014348177937790751, + "loss_iou": 0.88671875, + "loss_num": 0.0673828125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 31078836, + "step": 246 + }, + { + "epoch": 0.06336176489450394, + "grad_norm": 23.44645881652832, + "learning_rate": 5e-06, + "loss": 2.2773, + "num_input_tokens_seen": 31205948, + "step": 247 + }, + { + "epoch": 0.06336176489450394, + "loss": 2.290022373199463, + "loss_ce": 0.003889652667567134, + "loss_iou": 0.96875, + "loss_num": 0.06982421875, + "loss_xval": 2.28125, + "num_input_tokens_seen": 31205948, + "step": 247 + }, + { + "epoch": 0.0636182902584493, + "grad_norm": 20.679468154907227, + "learning_rate": 5e-06, + "loss": 2.128, + "num_input_tokens_seen": 31331520, + "step": 248 + }, + { + "epoch": 0.0636182902584493, + "loss": 2.1520919799804688, + "loss_ce": 0.00463095773011446, + "loss_iou": 0.9140625, + "loss_num": 0.0625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 31331520, + "step": 248 + }, + { + "epoch": 0.06387481562239467, + "grad_norm": 57.016754150390625, + "learning_rate": 5e-06, + "loss": 2.0256, + "num_input_tokens_seen": 31458964, + "step": 249 + }, + { + "epoch": 0.06387481562239467, + "loss": 2.2689638137817383, + "loss_ce": 0.004315140191465616, + "loss_iou": 0.9921875, + "loss_num": 0.0556640625, + "loss_xval": 2.265625, + "num_input_tokens_seen": 31458964, + "step": 249 + }, + { + "epoch": 0.06413134098634002, + "grad_norm": 34.484405517578125, + "learning_rate": 5e-06, + "loss": 2.1371, + "num_input_tokens_seen": 31584912, + "step": 250 + }, + { + "epoch": 0.06413134098634002, + "eval_icons_CIoU": -0.1733112409710884, + "eval_icons_GIoU": -0.1826426312327385, + "eval_icons_IoU": 0.0304591441527009, + "eval_icons_MAE_all": 0.10289280116558075, + "eval_icons_MAE_h": 0.13134828954935074, + "eval_icons_MAE_w": 0.11449958011507988, + "eval_icons_MAE_x_boxes": 0.0841725692152977, + "eval_icons_MAE_y_boxes": 0.09374520927667618, + "eval_icons_NUM_probability": 0.9985655248165131, + "eval_icons_inside_bbox": 0.05902777798473835, + "eval_icons_loss": 2.8885107040405273, + "eval_icons_loss_ce": 0.0029261676827445626, + "eval_icons_loss_iou": 1.205810546875, + "eval_icons_loss_num": 0.10430908203125, + "eval_icons_loss_xval": 2.93359375, + "eval_icons_runtime": 39.8635, + "eval_icons_samples_per_second": 1.254, + "eval_icons_steps_per_second": 0.05, + "num_input_tokens_seen": 31584912, + "step": 250 + }, + { + "epoch": 0.06413134098634002, + "eval_screenspot_CIoU": -0.09415260950724284, + "eval_screenspot_GIoU": -0.16390183195471764, + "eval_screenspot_IoU": 0.12400435407956441, + "eval_screenspot_MAE_all": 0.09230512628952663, + "eval_screenspot_MAE_h": 0.07452671478192012, + "eval_screenspot_MAE_w": 0.16431421538194022, + "eval_screenspot_MAE_x_boxes": 0.13593154648939768, + "eval_screenspot_MAE_y_boxes": 0.060380659997463226, + "eval_screenspot_NUM_probability": 0.9978764653205872, + "eval_screenspot_inside_bbox": 0.2600000003973643, + "eval_screenspot_loss": 2.838578939437866, + "eval_screenspot_loss_ce": 0.013154878125836452, + "eval_screenspot_loss_iou": 1.1829427083333333, + "eval_screenspot_loss_num": 0.09867350260416667, + "eval_screenspot_loss_xval": 2.8587239583333335, + "eval_screenspot_runtime": 68.5629, + "eval_screenspot_samples_per_second": 1.298, + "eval_screenspot_steps_per_second": 0.044, + "num_input_tokens_seen": 31584912, + "step": 250 + }, + { + "epoch": 0.06413134098634002, + "loss": 2.717196464538574, + "loss_ce": 0.004306042101234198, + "loss_iou": 1.1171875, + "loss_num": 0.095703125, + "loss_xval": 2.71875, + "num_input_tokens_seen": 31584912, + "step": 250 + }, + { + "epoch": 0.06438786635028539, + "grad_norm": 49.74070358276367, + "learning_rate": 5e-06, + "loss": 2.2404, + "num_input_tokens_seen": 31711864, + "step": 251 + }, + { + "epoch": 0.06438786635028539, + "loss": 2.3566224575042725, + "loss_ce": 0.005059942603111267, + "loss_iou": 1.0390625, + "loss_num": 0.05419921875, + "loss_xval": 2.34375, + "num_input_tokens_seen": 31711864, + "step": 251 + }, + { + "epoch": 0.06464439171423074, + "grad_norm": 43.546607971191406, + "learning_rate": 5e-06, + "loss": 2.3367, + "num_input_tokens_seen": 31838068, + "step": 252 + }, + { + "epoch": 0.06464439171423074, + "loss": 2.227330446243286, + "loss_ce": 0.002721048891544342, + "loss_iou": 0.9765625, + "loss_num": 0.0546875, + "loss_xval": 2.21875, + "num_input_tokens_seen": 31838068, + "step": 252 + }, + { + "epoch": 0.0649009170781761, + "grad_norm": 51.83170700073242, + "learning_rate": 5e-06, + "loss": 2.1357, + "num_input_tokens_seen": 31965188, + "step": 253 + }, + { + "epoch": 0.0649009170781761, + "loss": 2.151766777038574, + "loss_ce": 0.005282404366880655, + "loss_iou": 0.953125, + "loss_num": 0.048095703125, + "loss_xval": 2.140625, + "num_input_tokens_seen": 31965188, + "step": 253 + }, + { + "epoch": 0.06515744244212146, + "grad_norm": 44.354087829589844, + "learning_rate": 5e-06, + "loss": 2.1423, + "num_input_tokens_seen": 32093384, + "step": 254 + }, + { + "epoch": 0.06515744244212146, + "loss": 2.1908316612243652, + "loss_ce": 0.0033316893968731165, + "loss_iou": 0.96484375, + "loss_num": 0.05224609375, + "loss_xval": 2.1875, + "num_input_tokens_seen": 32093384, + "step": 254 + }, + { + "epoch": 0.06541396780606683, + "grad_norm": 60.01030731201172, + "learning_rate": 5e-06, + "loss": 2.106, + "num_input_tokens_seen": 32220848, + "step": 255 + }, + { + "epoch": 0.06541396780606683, + "loss": 1.972575306892395, + "loss_ce": 0.0018721634987741709, + "loss_iou": 0.890625, + "loss_num": 0.037353515625, + "loss_xval": 1.96875, + "num_input_tokens_seen": 32220848, + "step": 255 + }, + { + "epoch": 0.06567049317001218, + "grad_norm": 71.41215515136719, + "learning_rate": 5e-06, + "loss": 2.1934, + "num_input_tokens_seen": 32347488, + "step": 256 + }, + { + "epoch": 0.06567049317001218, + "loss": 2.2728734016418457, + "loss_ce": 0.001388987060636282, + "loss_iou": 1.046875, + "loss_num": 0.03564453125, + "loss_xval": 2.265625, + "num_input_tokens_seen": 32347488, + "step": 256 + }, + { + "epoch": 0.06592701853395755, + "grad_norm": 40.604068756103516, + "learning_rate": 5e-06, + "loss": 2.3601, + "num_input_tokens_seen": 32475256, + "step": 257 + }, + { + "epoch": 0.06592701853395755, + "loss": 2.2851829528808594, + "loss_ce": 0.003933164291083813, + "loss_iou": 0.98046875, + "loss_num": 0.06396484375, + "loss_xval": 2.28125, + "num_input_tokens_seen": 32475256, + "step": 257 + }, + { + "epoch": 0.0661835438979029, + "grad_norm": 26.89104652404785, + "learning_rate": 5e-06, + "loss": 2.215, + "num_input_tokens_seen": 32602640, + "step": 258 + }, + { + "epoch": 0.0661835438979029, + "loss": 2.274357557296753, + "loss_ce": 0.0009199426858685911, + "loss_iou": 0.9765625, + "loss_num": 0.064453125, + "loss_xval": 2.28125, + "num_input_tokens_seen": 32602640, + "step": 258 + }, + { + "epoch": 0.06644006926184827, + "grad_norm": 19.452545166015625, + "learning_rate": 5e-06, + "loss": 2.0058, + "num_input_tokens_seen": 32728872, + "step": 259 + }, + { + "epoch": 0.06644006926184827, + "loss": 2.014209508895874, + "loss_ce": 0.0024908818304538727, + "loss_iou": 0.890625, + "loss_num": 0.045654296875, + "loss_xval": 2.015625, + "num_input_tokens_seen": 32728872, + "step": 259 + }, + { + "epoch": 0.06669659462579362, + "grad_norm": 35.033939361572266, + "learning_rate": 5e-06, + "loss": 2.0158, + "num_input_tokens_seen": 32855932, + "step": 260 + }, + { + "epoch": 0.06669659462579362, + "loss": 2.0346410274505615, + "loss_ce": 0.004367677960544825, + "loss_iou": 0.91796875, + "loss_num": 0.038818359375, + "loss_xval": 2.03125, + "num_input_tokens_seen": 32855932, + "step": 260 + }, + { + "epoch": 0.06695311998973899, + "grad_norm": 36.546051025390625, + "learning_rate": 5e-06, + "loss": 2.0466, + "num_input_tokens_seen": 32982132, + "step": 261 + }, + { + "epoch": 0.06695311998973899, + "loss": 2.017688751220703, + "loss_ce": 0.00597008503973484, + "loss_iou": 0.8984375, + "loss_num": 0.043212890625, + "loss_xval": 2.015625, + "num_input_tokens_seen": 32982132, + "step": 261 + }, + { + "epoch": 0.06720964535368434, + "grad_norm": 115.64370727539062, + "learning_rate": 5e-06, + "loss": 2.2988, + "num_input_tokens_seen": 33109128, + "step": 262 + }, + { + "epoch": 0.06720964535368434, + "loss": 2.3690614700317383, + "loss_ce": 0.005780306179076433, + "loss_iou": 1.0703125, + "loss_num": 0.04296875, + "loss_xval": 2.359375, + "num_input_tokens_seen": 33109128, + "step": 262 + }, + { + "epoch": 0.06746617071762971, + "grad_norm": 30.0897216796875, + "learning_rate": 5e-06, + "loss": 2.6018, + "num_input_tokens_seen": 33235512, + "step": 263 + }, + { + "epoch": 0.06746617071762971, + "loss": 2.5445873737335205, + "loss_ce": 0.0035717228893190622, + "loss_iou": 1.046875, + "loss_num": 0.08837890625, + "loss_xval": 2.546875, + "num_input_tokens_seen": 33235512, + "step": 263 + }, + { + "epoch": 0.06772269608157507, + "grad_norm": 23.08639144897461, + "learning_rate": 5e-06, + "loss": 2.5253, + "num_input_tokens_seen": 33362000, + "step": 264 + }, + { + "epoch": 0.06772269608157507, + "loss": 2.502375841140747, + "loss_ce": 0.004328930750489235, + "loss_iou": 1.0234375, + "loss_num": 0.08935546875, + "loss_xval": 2.5, + "num_input_tokens_seen": 33362000, + "step": 264 + }, + { + "epoch": 0.06797922144552043, + "grad_norm": 24.348588943481445, + "learning_rate": 5e-06, + "loss": 2.4206, + "num_input_tokens_seen": 33489320, + "step": 265 + }, + { + "epoch": 0.06797922144552043, + "loss": 2.339601993560791, + "loss_ce": 0.0075707342475652695, + "loss_iou": 0.96875, + "loss_num": 0.0791015625, + "loss_xval": 2.328125, + "num_input_tokens_seen": 33489320, + "step": 265 + }, + { + "epoch": 0.06823574680946579, + "grad_norm": 20.285810470581055, + "learning_rate": 5e-06, + "loss": 2.4356, + "num_input_tokens_seen": 33616764, + "step": 266 + }, + { + "epoch": 0.06823574680946579, + "loss": 2.490346908569336, + "loss_ce": 0.005971949547529221, + "loss_iou": 1.0234375, + "loss_num": 0.087890625, + "loss_xval": 2.484375, + "num_input_tokens_seen": 33616764, + "step": 266 + }, + { + "epoch": 0.06849227217341114, + "grad_norm": 17.93982696533203, + "learning_rate": 5e-06, + "loss": 2.2166, + "num_input_tokens_seen": 33743144, + "step": 267 + }, + { + "epoch": 0.06849227217341114, + "loss": 2.276881694793701, + "loss_ce": 0.008326904848217964, + "loss_iou": 0.953125, + "loss_num": 0.072265625, + "loss_xval": 2.265625, + "num_input_tokens_seen": 33743144, + "step": 267 + }, + { + "epoch": 0.0687487975373565, + "grad_norm": 20.401243209838867, + "learning_rate": 5e-06, + "loss": 2.2606, + "num_input_tokens_seen": 33870924, + "step": 268 + }, + { + "epoch": 0.0687487975373565, + "loss": 2.206037998199463, + "loss_ce": 0.004866407718509436, + "loss_iou": 0.94140625, + "loss_num": 0.06396484375, + "loss_xval": 2.203125, + "num_input_tokens_seen": 33870924, + "step": 268 + }, + { + "epoch": 0.06900532290130186, + "grad_norm": 21.680824279785156, + "learning_rate": 5e-06, + "loss": 2.1135, + "num_input_tokens_seen": 33997444, + "step": 269 + }, + { + "epoch": 0.06900532290130186, + "loss": 2.153524160385132, + "loss_ce": 0.0031335398089140654, + "loss_iou": 0.9453125, + "loss_num": 0.052734375, + "loss_xval": 2.15625, + "num_input_tokens_seen": 33997444, + "step": 269 + }, + { + "epoch": 0.06926184826524723, + "grad_norm": 28.608409881591797, + "learning_rate": 5e-06, + "loss": 2.0925, + "num_input_tokens_seen": 34123632, + "step": 270 + }, + { + "epoch": 0.06926184826524723, + "loss": 2.0080747604370117, + "loss_ce": 0.0012387895258143544, + "loss_iou": 0.8984375, + "loss_num": 0.0419921875, + "loss_xval": 2.0, + "num_input_tokens_seen": 34123632, + "step": 270 + }, + { + "epoch": 0.06951837362919258, + "grad_norm": 44.43445587158203, + "learning_rate": 5e-06, + "loss": 2.0432, + "num_input_tokens_seen": 34248936, + "step": 271 + }, + { + "epoch": 0.06951837362919258, + "loss": 1.9678488969802856, + "loss_ce": 0.002028584945946932, + "loss_iou": 0.890625, + "loss_num": 0.03759765625, + "loss_xval": 1.96875, + "num_input_tokens_seen": 34248936, + "step": 271 + }, + { + "epoch": 0.06977489899313795, + "grad_norm": 90.41899108886719, + "learning_rate": 5e-06, + "loss": 2.2524, + "num_input_tokens_seen": 34374844, + "step": 272 + }, + { + "epoch": 0.06977489899313795, + "loss": 2.343357563018799, + "loss_ce": 0.006443414371460676, + "loss_iou": 1.0703125, + "loss_num": 0.039794921875, + "loss_xval": 2.34375, + "num_input_tokens_seen": 34374844, + "step": 272 + }, + { + "epoch": 0.0700314243570833, + "grad_norm": 37.55814743041992, + "learning_rate": 5e-06, + "loss": 2.4445, + "num_input_tokens_seen": 34500380, + "step": 273 + }, + { + "epoch": 0.0700314243570833, + "loss": 2.4703030586242676, + "loss_ce": 0.0015530271921306849, + "loss_iou": 1.03125, + "loss_num": 0.08154296875, + "loss_xval": 2.46875, + "num_input_tokens_seen": 34500380, + "step": 273 + }, + { + "epoch": 0.07028794972102867, + "grad_norm": 22.772979736328125, + "learning_rate": 5e-06, + "loss": 2.3322, + "num_input_tokens_seen": 34626028, + "step": 274 + }, + { + "epoch": 0.07028794972102867, + "loss": 2.4188337326049805, + "loss_ce": 0.004771072417497635, + "loss_iou": 1.0078125, + "loss_num": 0.080078125, + "loss_xval": 2.40625, + "num_input_tokens_seen": 34626028, + "step": 274 + }, + { + "epoch": 0.07054447508497402, + "grad_norm": 24.351337432861328, + "learning_rate": 5e-06, + "loss": 2.1592, + "num_input_tokens_seen": 34751784, + "step": 275 + }, + { + "epoch": 0.07054447508497402, + "loss": 2.281585454940796, + "loss_ce": 0.005218226462602615, + "loss_iou": 0.96484375, + "loss_num": 0.06982421875, + "loss_xval": 2.28125, + "num_input_tokens_seen": 34751784, + "step": 275 + }, + { + "epoch": 0.07080100044891939, + "grad_norm": 31.42517852783203, + "learning_rate": 5e-06, + "loss": 2.006, + "num_input_tokens_seen": 34878380, + "step": 276 + }, + { + "epoch": 0.07080100044891939, + "loss": 2.109785556793213, + "loss_ce": 0.007246410008519888, + "loss_iou": 0.92578125, + "loss_num": 0.04931640625, + "loss_xval": 2.109375, + "num_input_tokens_seen": 34878380, + "step": 276 + }, + { + "epoch": 0.07105752581286474, + "grad_norm": 35.536659240722656, + "learning_rate": 5e-06, + "loss": 1.9957, + "num_input_tokens_seen": 35005032, + "step": 277 + }, + { + "epoch": 0.07105752581286474, + "loss": 1.883124828338623, + "loss_ce": 0.006171601824462414, + "loss_iou": 0.859375, + "loss_num": 0.03173828125, + "loss_xval": 1.875, + "num_input_tokens_seen": 35005032, + "step": 277 + }, + { + "epoch": 0.07131405117681011, + "grad_norm": 64.79315948486328, + "learning_rate": 5e-06, + "loss": 2.1054, + "num_input_tokens_seen": 35130768, + "step": 278 + }, + { + "epoch": 0.07131405117681011, + "loss": 2.1725223064422607, + "loss_ce": 0.0026004298124462366, + "loss_iou": 0.96875, + "loss_num": 0.04638671875, + "loss_xval": 2.171875, + "num_input_tokens_seen": 35130768, + "step": 278 + }, + { + "epoch": 0.07157057654075547, + "grad_norm": 56.64155960083008, + "learning_rate": 5e-06, + "loss": 2.1437, + "num_input_tokens_seen": 35257380, + "step": 279 + }, + { + "epoch": 0.07157057654075547, + "loss": 2.116905927658081, + "loss_ce": 0.0036246790550649166, + "loss_iou": 0.9375, + "loss_num": 0.048095703125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 35257380, + "step": 279 + }, + { + "epoch": 0.07182710190470083, + "grad_norm": 29.063037872314453, + "learning_rate": 5e-06, + "loss": 2.3211, + "num_input_tokens_seen": 35383000, + "step": 280 + }, + { + "epoch": 0.07182710190470083, + "loss": 2.2586913108825684, + "loss_ce": 0.00283189513720572, + "loss_iou": 0.98828125, + "loss_num": 0.056640625, + "loss_xval": 2.25, + "num_input_tokens_seen": 35383000, + "step": 280 + }, + { + "epoch": 0.07208362726864619, + "grad_norm": 34.01350021362305, + "learning_rate": 5e-06, + "loss": 1.9599, + "num_input_tokens_seen": 35510324, + "step": 281 + }, + { + "epoch": 0.07208362726864619, + "loss": 2.0147454738616943, + "loss_ce": 0.0030266791582107544, + "loss_iou": 0.8984375, + "loss_num": 0.0419921875, + "loss_xval": 2.015625, + "num_input_tokens_seen": 35510324, + "step": 281 + }, + { + "epoch": 0.07234015263259155, + "grad_norm": 108.72042846679688, + "learning_rate": 5e-06, + "loss": 2.2946, + "num_input_tokens_seen": 35636940, + "step": 282 + }, + { + "epoch": 0.07234015263259155, + "loss": 2.4681782722473145, + "loss_ce": 0.00821721088141203, + "loss_iou": 1.109375, + "loss_num": 0.046630859375, + "loss_xval": 2.453125, + "num_input_tokens_seen": 35636940, + "step": 282 + }, + { + "epoch": 0.0725966779965369, + "grad_norm": 30.108070373535156, + "learning_rate": 5e-06, + "loss": 2.4316, + "num_input_tokens_seen": 35763996, + "step": 283 + }, + { + "epoch": 0.0725966779965369, + "loss": 2.4666950702667236, + "loss_ce": 0.0038043856620788574, + "loss_iou": 1.03125, + "loss_num": 0.080078125, + "loss_xval": 2.46875, + "num_input_tokens_seen": 35763996, + "step": 283 + }, + { + "epoch": 0.07285320336048227, + "grad_norm": 28.02402687072754, + "learning_rate": 5e-06, + "loss": 2.3357, + "num_input_tokens_seen": 35890428, + "step": 284 + }, + { + "epoch": 0.07285320336048227, + "loss": 2.294663429260254, + "loss_ce": 0.007553888484835625, + "loss_iou": 0.96484375, + "loss_num": 0.07080078125, + "loss_xval": 2.28125, + "num_input_tokens_seen": 35890428, + "step": 284 + }, + { + "epoch": 0.07310972872442763, + "grad_norm": 27.3283748626709, + "learning_rate": 5e-06, + "loss": 2.1291, + "num_input_tokens_seen": 36017888, + "step": 285 + }, + { + "epoch": 0.07310972872442763, + "loss": 2.122551918029785, + "loss_ce": 0.0053645046427845955, + "loss_iou": 0.9140625, + "loss_num": 0.058837890625, + "loss_xval": 2.125, + "num_input_tokens_seen": 36017888, + "step": 285 + }, + { + "epoch": 0.07336625408837298, + "grad_norm": 31.38198471069336, + "learning_rate": 5e-06, + "loss": 2.0225, + "num_input_tokens_seen": 36145740, + "step": 286 + }, + { + "epoch": 0.07336625408837298, + "loss": 1.9516512155532837, + "loss_ce": 0.0034090036060661077, + "loss_iou": 0.8671875, + "loss_num": 0.04248046875, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 36145740, + "step": 286 + }, + { + "epoch": 0.07362277945231835, + "grad_norm": 52.19550704956055, + "learning_rate": 5e-06, + "loss": 1.9859, + "num_input_tokens_seen": 36272304, + "step": 287 + }, + { + "epoch": 0.07362277945231835, + "loss": 2.0019702911376953, + "loss_ce": 0.010759426280856133, + "loss_iou": 0.90625, + "loss_num": 0.0361328125, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 36272304, + "step": 287 + }, + { + "epoch": 0.0738793048162637, + "grad_norm": 41.171653747558594, + "learning_rate": 5e-06, + "loss": 2.2903, + "num_input_tokens_seen": 36398548, + "step": 288 + }, + { + "epoch": 0.0738793048162637, + "loss": 2.27291202545166, + "loss_ce": 0.004357603378593922, + "loss_iou": 0.9921875, + "loss_num": 0.05615234375, + "loss_xval": 2.265625, + "num_input_tokens_seen": 36398548, + "step": 288 + }, + { + "epoch": 0.07413583018020907, + "grad_norm": 24.66343116760254, + "learning_rate": 5e-06, + "loss": 2.0048, + "num_input_tokens_seen": 36525080, + "step": 289 + }, + { + "epoch": 0.07413583018020907, + "loss": 2.065984010696411, + "loss_ce": 0.003483960870653391, + "loss_iou": 0.90234375, + "loss_num": 0.051513671875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 36525080, + "step": 289 + }, + { + "epoch": 0.07439235554415442, + "grad_norm": 82.30351257324219, + "learning_rate": 5e-06, + "loss": 1.9926, + "num_input_tokens_seen": 36650992, + "step": 290 + }, + { + "epoch": 0.07439235554415442, + "loss": 1.9284744262695312, + "loss_ce": 0.004646307323127985, + "loss_iou": 0.890625, + "loss_num": 0.0283203125, + "loss_xval": 1.921875, + "num_input_tokens_seen": 36650992, + "step": 290 + }, + { + "epoch": 0.07464888090809979, + "grad_norm": 35.82350540161133, + "learning_rate": 5e-06, + "loss": 2.3368, + "num_input_tokens_seen": 36776252, + "step": 291 + }, + { + "epoch": 0.07464888090809979, + "loss": 2.3444504737854004, + "loss_ce": 0.0007005850784480572, + "loss_iou": 1.015625, + "loss_num": 0.0625, + "loss_xval": 2.34375, + "num_input_tokens_seen": 36776252, + "step": 291 + }, + { + "epoch": 0.07490540627204514, + "grad_norm": 36.02092742919922, + "learning_rate": 5e-06, + "loss": 2.0524, + "num_input_tokens_seen": 36903024, + "step": 292 + }, + { + "epoch": 0.07490540627204514, + "loss": 2.1769344806671143, + "loss_ce": 0.0070125930942595005, + "loss_iou": 0.94140625, + "loss_num": 0.057373046875, + "loss_xval": 2.171875, + "num_input_tokens_seen": 36903024, + "step": 292 + }, + { + "epoch": 0.07516193163599051, + "grad_norm": 37.4872932434082, + "learning_rate": 5e-06, + "loss": 1.9679, + "num_input_tokens_seen": 37030316, + "step": 293 + }, + { + "epoch": 0.07516193163599051, + "loss": 2.1317567825317383, + "loss_ce": 0.0008972855284810066, + "loss_iou": 0.96484375, + "loss_num": 0.03955078125, + "loss_xval": 2.125, + "num_input_tokens_seen": 37030316, + "step": 293 + }, + { + "epoch": 0.07541845699993587, + "grad_norm": 75.38507843017578, + "learning_rate": 5e-06, + "loss": 2.0778, + "num_input_tokens_seen": 37157364, + "step": 294 + }, + { + "epoch": 0.07541845699993587, + "loss": 1.9384303092956543, + "loss_ce": 0.003860055236145854, + "loss_iou": 0.8828125, + "loss_num": 0.033935546875, + "loss_xval": 1.9375, + "num_input_tokens_seen": 37157364, + "step": 294 + }, + { + "epoch": 0.07567498236388123, + "grad_norm": 30.82659149169922, + "learning_rate": 5e-06, + "loss": 2.1344, + "num_input_tokens_seen": 37283780, + "step": 295 + }, + { + "epoch": 0.07567498236388123, + "loss": 2.34016752243042, + "loss_ce": 0.0022770087234675884, + "loss_iou": 1.03125, + "loss_num": 0.055908203125, + "loss_xval": 2.34375, + "num_input_tokens_seen": 37283780, + "step": 295 + }, + { + "epoch": 0.07593150772782659, + "grad_norm": 41.268489837646484, + "learning_rate": 5e-06, + "loss": 2.1669, + "num_input_tokens_seen": 37410612, + "step": 296 + }, + { + "epoch": 0.07593150772782659, + "loss": 2.111407995223999, + "loss_ce": 0.0020328881219029427, + "loss_iou": 0.95703125, + "loss_num": 0.0390625, + "loss_xval": 2.109375, + "num_input_tokens_seen": 37410612, + "step": 296 + }, + { + "epoch": 0.07618803309177195, + "grad_norm": 48.545631408691406, + "learning_rate": 5e-06, + "loss": 2.1216, + "num_input_tokens_seen": 37537244, + "step": 297 + }, + { + "epoch": 0.07618803309177195, + "loss": 2.1502416133880615, + "loss_ce": 0.0027806456200778484, + "loss_iou": 0.95703125, + "loss_num": 0.0458984375, + "loss_xval": 2.140625, + "num_input_tokens_seen": 37537244, + "step": 297 + }, + { + "epoch": 0.0764445584557173, + "grad_norm": 35.040672302246094, + "learning_rate": 5e-06, + "loss": 1.8582, + "num_input_tokens_seen": 37663244, + "step": 298 + }, + { + "epoch": 0.0764445584557173, + "loss": 1.9157030582427979, + "loss_ce": 0.002617096295580268, + "loss_iou": 0.86328125, + "loss_num": 0.036865234375, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 37663244, + "step": 298 + }, + { + "epoch": 0.07670108381966267, + "grad_norm": 49.392520904541016, + "learning_rate": 5e-06, + "loss": 2.0651, + "num_input_tokens_seen": 37789464, + "step": 299 + }, + { + "epoch": 0.07670108381966267, + "loss": 2.158250331878662, + "loss_ce": 0.004929990973323584, + "loss_iou": 0.95703125, + "loss_num": 0.047607421875, + "loss_xval": 2.15625, + "num_input_tokens_seen": 37789464, + "step": 299 + }, + { + "epoch": 0.07695760918360803, + "grad_norm": 96.1633071899414, + "learning_rate": 5e-06, + "loss": 2.2197, + "num_input_tokens_seen": 37914488, + "step": 300 + }, + { + "epoch": 0.07695760918360803, + "loss": 2.2829689979553223, + "loss_ce": 0.004648739937692881, + "loss_iou": 1.046875, + "loss_num": 0.036376953125, + "loss_xval": 2.28125, + "num_input_tokens_seen": 37914488, + "step": 300 + }, + { + "epoch": 0.0772141345475534, + "grad_norm": 30.415678024291992, + "learning_rate": 5e-06, + "loss": 2.3991, + "num_input_tokens_seen": 38040540, + "step": 301 + }, + { + "epoch": 0.0772141345475534, + "loss": 2.2139339447021484, + "loss_ce": 0.002019990235567093, + "loss_iou": 0.9453125, + "loss_num": 0.064453125, + "loss_xval": 2.21875, + "num_input_tokens_seen": 38040540, + "step": 301 + }, + { + "epoch": 0.07747065991149875, + "grad_norm": 30.147260665893555, + "learning_rate": 5e-06, + "loss": 2.1637, + "num_input_tokens_seen": 38167144, + "step": 302 + }, + { + "epoch": 0.07747065991149875, + "loss": 2.132211685180664, + "loss_ce": 0.004282068461179733, + "loss_iou": 0.9140625, + "loss_num": 0.060302734375, + "loss_xval": 2.125, + "num_input_tokens_seen": 38167144, + "step": 302 + }, + { + "epoch": 0.07772718527544412, + "grad_norm": 25.657854080200195, + "learning_rate": 5e-06, + "loss": 2.1041, + "num_input_tokens_seen": 38293096, + "step": 303 + }, + { + "epoch": 0.07772718527544412, + "loss": 2.2727856636047363, + "loss_ce": 0.004231109283864498, + "loss_iou": 0.96875, + "loss_num": 0.0654296875, + "loss_xval": 2.265625, + "num_input_tokens_seen": 38293096, + "step": 303 + }, + { + "epoch": 0.07798371063938947, + "grad_norm": 27.7092227935791, + "learning_rate": 5e-06, + "loss": 1.92, + "num_input_tokens_seen": 38419496, + "step": 304 + }, + { + "epoch": 0.07798371063938947, + "loss": 1.987078309059143, + "loss_ce": 0.0017268848605453968, + "loss_iou": 0.8828125, + "loss_num": 0.044189453125, + "loss_xval": 1.984375, + "num_input_tokens_seen": 38419496, + "step": 304 + }, + { + "epoch": 0.07824023600333482, + "grad_norm": 68.24254608154297, + "learning_rate": 5e-06, + "loss": 2.2109, + "num_input_tokens_seen": 38545588, + "step": 305 + }, + { + "epoch": 0.07824023600333482, + "loss": 2.22039532661438, + "loss_ce": 0.005551714450120926, + "loss_iou": 0.99609375, + "loss_num": 0.044921875, + "loss_xval": 2.21875, + "num_input_tokens_seen": 38545588, + "step": 305 + }, + { + "epoch": 0.07849676136728019, + "grad_norm": 39.15468215942383, + "learning_rate": 5e-06, + "loss": 2.2047, + "num_input_tokens_seen": 38671516, + "step": 306 + }, + { + "epoch": 0.07849676136728019, + "loss": 2.1540074348449707, + "loss_ce": 0.0016635048668831587, + "loss_iou": 0.94921875, + "loss_num": 0.05078125, + "loss_xval": 2.15625, + "num_input_tokens_seen": 38671516, + "step": 306 + }, + { + "epoch": 0.07875328673122554, + "grad_norm": 29.422529220581055, + "learning_rate": 5e-06, + "loss": 1.9422, + "num_input_tokens_seen": 38797624, + "step": 307 + }, + { + "epoch": 0.07875328673122554, + "loss": 2.0709614753723145, + "loss_ce": 0.006508187390863895, + "loss_iou": 0.91015625, + "loss_num": 0.047607421875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 38797624, + "step": 307 + }, + { + "epoch": 0.07900981209517091, + "grad_norm": 47.252044677734375, + "learning_rate": 5e-06, + "loss": 1.8794, + "num_input_tokens_seen": 38924004, + "step": 308 + }, + { + "epoch": 0.07900981209517091, + "loss": 2.0390894412994385, + "loss_ce": 0.0010035168379545212, + "loss_iou": 0.91015625, + "loss_num": 0.043212890625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 38924004, + "step": 308 + }, + { + "epoch": 0.07926633745911627, + "grad_norm": 48.2392578125, + "learning_rate": 5e-06, + "loss": 2.1417, + "num_input_tokens_seen": 39050300, + "step": 309 + }, + { + "epoch": 0.07926633745911627, + "loss": 2.0224266052246094, + "loss_ce": 0.0038719885051250458, + "loss_iou": 0.90234375, + "loss_num": 0.04296875, + "loss_xval": 2.015625, + "num_input_tokens_seen": 39050300, + "step": 309 + }, + { + "epoch": 0.07952286282306163, + "grad_norm": 68.7890625, + "learning_rate": 5e-06, + "loss": 1.8536, + "num_input_tokens_seen": 39176956, + "step": 310 + }, + { + "epoch": 0.07952286282306163, + "loss": 1.9431216716766357, + "loss_ce": 0.003668619552627206, + "loss_iou": 0.890625, + "loss_num": 0.03076171875, + "loss_xval": 1.9375, + "num_input_tokens_seen": 39176956, + "step": 310 + }, + { + "epoch": 0.07977938818700699, + "grad_norm": 38.89787292480469, + "learning_rate": 5e-06, + "loss": 2.2376, + "num_input_tokens_seen": 39302552, + "step": 311 + }, + { + "epoch": 0.07977938818700699, + "loss": 2.3054964542388916, + "loss_ce": 0.002762057352811098, + "loss_iou": 0.9921875, + "loss_num": 0.06298828125, + "loss_xval": 2.296875, + "num_input_tokens_seen": 39302552, + "step": 311 + }, + { + "epoch": 0.08003591355095235, + "grad_norm": 30.763851165771484, + "learning_rate": 5e-06, + "loss": 2.0128, + "num_input_tokens_seen": 39429016, + "step": 312 + }, + { + "epoch": 0.08003591355095235, + "loss": 2.0272104740142822, + "loss_ce": 0.004749531392008066, + "loss_iou": 0.890625, + "loss_num": 0.04736328125, + "loss_xval": 2.015625, + "num_input_tokens_seen": 39429016, + "step": 312 + }, + { + "epoch": 0.0802924389148977, + "grad_norm": 64.25830841064453, + "learning_rate": 5e-06, + "loss": 2.0167, + "num_input_tokens_seen": 39555464, + "step": 313 + }, + { + "epoch": 0.0802924389148977, + "loss": 1.9554970264434814, + "loss_ce": 0.005301830358803272, + "loss_iou": 0.89453125, + "loss_num": 0.031494140625, + "loss_xval": 1.953125, + "num_input_tokens_seen": 39555464, + "step": 313 + }, + { + "epoch": 0.08054896427884307, + "grad_norm": 48.00767517089844, + "learning_rate": 5e-06, + "loss": 2.1942, + "num_input_tokens_seen": 39682084, + "step": 314 + }, + { + "epoch": 0.08054896427884307, + "loss": 2.223456859588623, + "loss_ce": 0.0008005423005670309, + "loss_iou": 0.96484375, + "loss_num": 0.05859375, + "loss_xval": 2.21875, + "num_input_tokens_seen": 39682084, + "step": 314 + }, + { + "epoch": 0.08080548964278843, + "grad_norm": 51.093257904052734, + "learning_rate": 5e-06, + "loss": 2.1112, + "num_input_tokens_seen": 39808596, + "step": 315 + }, + { + "epoch": 0.08080548964278843, + "loss": 2.1587576866149902, + "loss_ce": 0.0044608633033931255, + "loss_iou": 0.9453125, + "loss_num": 0.052490234375, + "loss_xval": 2.15625, + "num_input_tokens_seen": 39808596, + "step": 315 + }, + { + "epoch": 0.0810620150067338, + "grad_norm": 42.96095657348633, + "learning_rate": 5e-06, + "loss": 2.0367, + "num_input_tokens_seen": 39933928, + "step": 316 + }, + { + "epoch": 0.0810620150067338, + "loss": 2.033493995666504, + "loss_ce": 0.0041971355676651, + "loss_iou": 0.90625, + "loss_num": 0.0439453125, + "loss_xval": 2.03125, + "num_input_tokens_seen": 39933928, + "step": 316 + }, + { + "epoch": 0.08131854037067915, + "grad_norm": 26.521760940551758, + "learning_rate": 5e-06, + "loss": 1.8722, + "num_input_tokens_seen": 40060972, + "step": 317 + }, + { + "epoch": 0.08131854037067915, + "loss": 1.9469468593597412, + "loss_ce": 0.007493725512176752, + "loss_iou": 0.86328125, + "loss_num": 0.0419921875, + "loss_xval": 1.9375, + "num_input_tokens_seen": 40060972, + "step": 317 + }, + { + "epoch": 0.08157506573462452, + "grad_norm": 80.15369415283203, + "learning_rate": 5e-06, + "loss": 1.8141, + "num_input_tokens_seen": 40188076, + "step": 318 + }, + { + "epoch": 0.08157506573462452, + "loss": 1.7371220588684082, + "loss_ce": 0.002747059566900134, + "loss_iou": 0.80859375, + "loss_num": 0.0242919921875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 40188076, + "step": 318 + }, + { + "epoch": 0.08183159109856987, + "grad_norm": 39.99967956542969, + "learning_rate": 5e-06, + "loss": 2.239, + "num_input_tokens_seen": 40314776, + "step": 319 + }, + { + "epoch": 0.08183159109856987, + "loss": 2.1933367252349854, + "loss_ce": 0.011696097441017628, + "loss_iou": 0.94921875, + "loss_num": 0.056396484375, + "loss_xval": 2.1875, + "num_input_tokens_seen": 40314776, + "step": 319 + }, + { + "epoch": 0.08208811646251524, + "grad_norm": 35.31010437011719, + "learning_rate": 5e-06, + "loss": 2.0932, + "num_input_tokens_seen": 40442032, + "step": 320 + }, + { + "epoch": 0.08208811646251524, + "loss": 2.0577545166015625, + "loss_ce": 0.002090286463499069, + "loss_iou": 0.90234375, + "loss_num": 0.050537109375, + "loss_xval": 2.0625, + "num_input_tokens_seen": 40442032, + "step": 320 + }, + { + "epoch": 0.08234464182646059, + "grad_norm": 49.73766326904297, + "learning_rate": 5e-06, + "loss": 2.0163, + "num_input_tokens_seen": 40568464, + "step": 321 + }, + { + "epoch": 0.08234464182646059, + "loss": 1.8720080852508545, + "loss_ce": 0.002867449074983597, + "loss_iou": 0.859375, + "loss_num": 0.029541015625, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 40568464, + "step": 321 + }, + { + "epoch": 0.08260116719040596, + "grad_norm": 34.36306381225586, + "learning_rate": 5e-06, + "loss": 2.0574, + "num_input_tokens_seen": 40694580, + "step": 322 + }, + { + "epoch": 0.08260116719040596, + "loss": 2.0855612754821777, + "loss_ce": 0.0025534306187182665, + "loss_iou": 0.921875, + "loss_num": 0.048095703125, + "loss_xval": 2.078125, + "num_input_tokens_seen": 40694580, + "step": 322 + }, + { + "epoch": 0.08285769255435131, + "grad_norm": 71.71194458007812, + "learning_rate": 5e-06, + "loss": 1.8492, + "num_input_tokens_seen": 40820932, + "step": 323 + }, + { + "epoch": 0.08285769255435131, + "loss": 1.9663660526275635, + "loss_ce": 0.00249873218126595, + "loss_iou": 0.8984375, + "loss_num": 0.03271484375, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 40820932, + "step": 323 + }, + { + "epoch": 0.08311421791829667, + "grad_norm": 31.01168441772461, + "learning_rate": 5e-06, + "loss": 2.0396, + "num_input_tokens_seen": 40947164, + "step": 324 + }, + { + "epoch": 0.08311421791829667, + "loss": 1.9267469644546509, + "loss_ce": 0.001942227827385068, + "loss_iou": 0.8671875, + "loss_num": 0.0390625, + "loss_xval": 1.921875, + "num_input_tokens_seen": 40947164, + "step": 324 + }, + { + "epoch": 0.08337074328224203, + "grad_norm": 52.59309005737305, + "learning_rate": 5e-06, + "loss": 1.9765, + "num_input_tokens_seen": 41073864, + "step": 325 + }, + { + "epoch": 0.08337074328224203, + "loss": 1.9303945302963257, + "loss_ce": 0.008519560098648071, + "loss_iou": 0.86328125, + "loss_num": 0.03955078125, + "loss_xval": 1.921875, + "num_input_tokens_seen": 41073864, + "step": 325 + }, + { + "epoch": 0.08362726864618739, + "grad_norm": 35.378395080566406, + "learning_rate": 5e-06, + "loss": 2.1639, + "num_input_tokens_seen": 41200792, + "step": 326 + }, + { + "epoch": 0.08362726864618739, + "loss": 2.069206953048706, + "loss_ce": 0.006706961430609226, + "loss_iou": 0.91796875, + "loss_num": 0.04541015625, + "loss_xval": 2.0625, + "num_input_tokens_seen": 41200792, + "step": 326 + }, + { + "epoch": 0.08388379401013275, + "grad_norm": 30.763845443725586, + "learning_rate": 5e-06, + "loss": 1.838, + "num_input_tokens_seen": 41326336, + "step": 327 + }, + { + "epoch": 0.08388379401013275, + "loss": 1.6947425603866577, + "loss_ce": 0.006265943869948387, + "loss_iou": 0.765625, + "loss_num": 0.0311279296875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 41326336, + "step": 327 + }, + { + "epoch": 0.0841403193740781, + "grad_norm": 73.30945587158203, + "learning_rate": 5e-06, + "loss": 1.9105, + "num_input_tokens_seen": 41451948, + "step": 328 + }, + { + "epoch": 0.0841403193740781, + "loss": 1.769942045211792, + "loss_ce": 0.0023639630526304245, + "loss_iou": 0.80859375, + "loss_num": 0.029541015625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 41451948, + "step": 328 + }, + { + "epoch": 0.08439684473802347, + "grad_norm": 39.43877029418945, + "learning_rate": 5e-06, + "loss": 2.3367, + "num_input_tokens_seen": 41578872, + "step": 329 + }, + { + "epoch": 0.08439684473802347, + "loss": 2.3628485202789307, + "loss_ce": 0.001520465943031013, + "loss_iou": 1.0234375, + "loss_num": 0.0625, + "loss_xval": 2.359375, + "num_input_tokens_seen": 41578872, + "step": 329 + }, + { + "epoch": 0.08465337010196883, + "grad_norm": 28.91825294494629, + "learning_rate": 5e-06, + "loss": 2.0443, + "num_input_tokens_seen": 41705708, + "step": 330 + }, + { + "epoch": 0.08465337010196883, + "loss": 2.009075403213501, + "loss_ce": 0.0022394044790416956, + "loss_iou": 0.875, + "loss_num": 0.052001953125, + "loss_xval": 2.0, + "num_input_tokens_seen": 41705708, + "step": 330 + }, + { + "epoch": 0.0849098954659142, + "grad_norm": 74.33367919921875, + "learning_rate": 5e-06, + "loss": 1.9746, + "num_input_tokens_seen": 41830948, + "step": 331 + }, + { + "epoch": 0.0849098954659142, + "loss": 1.8742315769195557, + "loss_ce": 0.0021611948031932116, + "loss_iou": 0.84375, + "loss_num": 0.037109375, + "loss_xval": 1.875, + "num_input_tokens_seen": 41830948, + "step": 331 + }, + { + "epoch": 0.08516642082985955, + "grad_norm": 38.942474365234375, + "learning_rate": 5e-06, + "loss": 2.0466, + "num_input_tokens_seen": 41957564, + "step": 332 + }, + { + "epoch": 0.08516642082985955, + "loss": 2.1700406074523926, + "loss_ce": 0.0020719519816339016, + "loss_iou": 0.9609375, + "loss_num": 0.050537109375, + "loss_xval": 2.171875, + "num_input_tokens_seen": 41957564, + "step": 332 + }, + { + "epoch": 0.08542294619380492, + "grad_norm": 35.53105926513672, + "learning_rate": 5e-06, + "loss": 1.9198, + "num_input_tokens_seen": 42082748, + "step": 333 + }, + { + "epoch": 0.08542294619380492, + "loss": 1.8956103324890137, + "loss_ce": 0.004985298030078411, + "loss_iou": 0.859375, + "loss_num": 0.035400390625, + "loss_xval": 1.890625, + "num_input_tokens_seen": 42082748, + "step": 333 + }, + { + "epoch": 0.08567947155775027, + "grad_norm": 62.77531814575195, + "learning_rate": 5e-06, + "loss": 1.9514, + "num_input_tokens_seen": 42209312, + "step": 334 + }, + { + "epoch": 0.08567947155775027, + "loss": 1.9161392450332642, + "loss_ce": 0.0020767301321029663, + "loss_iou": 0.875, + "loss_num": 0.031982421875, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 42209312, + "step": 334 + }, + { + "epoch": 0.08593599692169564, + "grad_norm": 45.00438690185547, + "learning_rate": 5e-06, + "loss": 2.1255, + "num_input_tokens_seen": 42335452, + "step": 335 + }, + { + "epoch": 0.08593599692169564, + "loss": 2.1374192237854004, + "loss_ce": 0.0026534399949014187, + "loss_iou": 0.94921875, + "loss_num": 0.046875, + "loss_xval": 2.140625, + "num_input_tokens_seen": 42335452, + "step": 335 + }, + { + "epoch": 0.08619252228564099, + "grad_norm": 39.92178726196289, + "learning_rate": 5e-06, + "loss": 1.9765, + "num_input_tokens_seen": 42462052, + "step": 336 + }, + { + "epoch": 0.08619252228564099, + "loss": 2.1666581630706787, + "loss_ce": 0.004548938944935799, + "loss_iou": 0.9609375, + "loss_num": 0.048095703125, + "loss_xval": 2.15625, + "num_input_tokens_seen": 42462052, + "step": 336 + }, + { + "epoch": 0.08644904764958636, + "grad_norm": 67.73116302490234, + "learning_rate": 5e-06, + "loss": 2.0433, + "num_input_tokens_seen": 42588228, + "step": 337 + }, + { + "epoch": 0.08644904764958636, + "loss": 2.058134078979492, + "loss_ce": 0.0014934733044356108, + "loss_iou": 0.94921875, + "loss_num": 0.03173828125, + "loss_xval": 2.0625, + "num_input_tokens_seen": 42588228, + "step": 337 + }, + { + "epoch": 0.08670557301353171, + "grad_norm": 32.76817321777344, + "learning_rate": 5e-06, + "loss": 2.4295, + "num_input_tokens_seen": 42713744, + "step": 338 + }, + { + "epoch": 0.08670557301353171, + "loss": 2.3363304138183594, + "loss_ce": 0.002345960820093751, + "loss_iou": 1.015625, + "loss_num": 0.0595703125, + "loss_xval": 2.328125, + "num_input_tokens_seen": 42713744, + "step": 338 + }, + { + "epoch": 0.08696209837747708, + "grad_norm": 31.98774528503418, + "learning_rate": 5e-06, + "loss": 2.1127, + "num_input_tokens_seen": 42840752, + "step": 339 + }, + { + "epoch": 0.08696209837747708, + "loss": 2.0008487701416016, + "loss_ce": 0.0008488236926496029, + "loss_iou": 0.89453125, + "loss_num": 0.042724609375, + "loss_xval": 2.0, + "num_input_tokens_seen": 42840752, + "step": 339 + }, + { + "epoch": 0.08721862374142243, + "grad_norm": 43.33399200439453, + "learning_rate": 5e-06, + "loss": 1.8944, + "num_input_tokens_seen": 42966564, + "step": 340 + }, + { + "epoch": 0.08721862374142243, + "loss": 1.8025044202804565, + "loss_ce": 0.003676345804706216, + "loss_iou": 0.81640625, + "loss_num": 0.03369140625, + "loss_xval": 1.796875, + "num_input_tokens_seen": 42966564, + "step": 340 + }, + { + "epoch": 0.0874751491053678, + "grad_norm": 70.97069549560547, + "learning_rate": 5e-06, + "loss": 1.8752, + "num_input_tokens_seen": 43092624, + "step": 341 + }, + { + "epoch": 0.0874751491053678, + "loss": 1.6924054622650146, + "loss_ce": 0.0009993019048124552, + "loss_iou": 0.7890625, + "loss_num": 0.0235595703125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 43092624, + "step": 341 + }, + { + "epoch": 0.08773167446931315, + "grad_norm": 46.820491790771484, + "learning_rate": 5e-06, + "loss": 2.1108, + "num_input_tokens_seen": 43219300, + "step": 342 + }, + { + "epoch": 0.08773167446931315, + "loss": 2.0372846126556396, + "loss_ce": 0.0011517130769789219, + "loss_iou": 0.90625, + "loss_num": 0.04443359375, + "loss_xval": 2.03125, + "num_input_tokens_seen": 43219300, + "step": 342 + }, + { + "epoch": 0.0879881998332585, + "grad_norm": 30.59954833984375, + "learning_rate": 5e-06, + "loss": 1.8693, + "num_input_tokens_seen": 43345552, + "step": 343 + }, + { + "epoch": 0.0879881998332585, + "loss": 1.8340370655059814, + "loss_ce": 0.0010292291408404708, + "loss_iou": 0.8125, + "loss_num": 0.04150390625, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 43345552, + "step": 343 + }, + { + "epoch": 0.08824472519720387, + "grad_norm": 65.5096206665039, + "learning_rate": 5e-06, + "loss": 1.9539, + "num_input_tokens_seen": 43471912, + "step": 344 + }, + { + "epoch": 0.08824472519720387, + "loss": 1.952235221862793, + "loss_ce": 0.0010633106576278806, + "loss_iou": 0.890625, + "loss_num": 0.034912109375, + "loss_xval": 1.953125, + "num_input_tokens_seen": 43471912, + "step": 344 + }, + { + "epoch": 0.08850125056114923, + "grad_norm": 33.71207046508789, + "learning_rate": 5e-06, + "loss": 2.0613, + "num_input_tokens_seen": 43595624, + "step": 345 + }, + { + "epoch": 0.08850125056114923, + "loss": 2.069028854370117, + "loss_ce": 0.002622646978124976, + "loss_iou": 0.90625, + "loss_num": 0.05126953125, + "loss_xval": 2.0625, + "num_input_tokens_seen": 43595624, + "step": 345 + }, + { + "epoch": 0.0887577759250946, + "grad_norm": 36.453407287597656, + "learning_rate": 5e-06, + "loss": 1.9835, + "num_input_tokens_seen": 43722700, + "step": 346 + }, + { + "epoch": 0.0887577759250946, + "loss": 2.160922050476074, + "loss_ce": 0.004672117996960878, + "loss_iou": 0.9453125, + "loss_num": 0.052734375, + "loss_xval": 2.15625, + "num_input_tokens_seen": 43722700, + "step": 346 + }, + { + "epoch": 0.08901430128903995, + "grad_norm": 73.64696502685547, + "learning_rate": 5e-06, + "loss": 1.8329, + "num_input_tokens_seen": 43849576, + "step": 347 + }, + { + "epoch": 0.08901430128903995, + "loss": 1.9572696685791016, + "loss_ce": 0.0021915710531175137, + "loss_iou": 0.890625, + "loss_num": 0.035400390625, + "loss_xval": 1.953125, + "num_input_tokens_seen": 43849576, + "step": 347 + }, + { + "epoch": 0.08927082665298532, + "grad_norm": 41.60002517700195, + "learning_rate": 5e-06, + "loss": 2.2567, + "num_input_tokens_seen": 43975872, + "step": 348 + }, + { + "epoch": 0.08927082665298532, + "loss": 2.241227865219116, + "loss_ce": 0.002946640830487013, + "loss_iou": 0.98046875, + "loss_num": 0.055419921875, + "loss_xval": 2.234375, + "num_input_tokens_seen": 43975872, + "step": 348 + }, + { + "epoch": 0.08952735201693067, + "grad_norm": 33.033058166503906, + "learning_rate": 5e-06, + "loss": 2.091, + "num_input_tokens_seen": 44103356, + "step": 349 + }, + { + "epoch": 0.08952735201693067, + "loss": 2.138526201248169, + "loss_ce": 0.005713779479265213, + "loss_iou": 0.9375, + "loss_num": 0.05126953125, + "loss_xval": 2.125, + "num_input_tokens_seen": 44103356, + "step": 349 + }, + { + "epoch": 0.08978387738087604, + "grad_norm": 92.88117980957031, + "learning_rate": 5e-06, + "loss": 1.8291, + "num_input_tokens_seen": 44231064, + "step": 350 + }, + { + "epoch": 0.08978387738087604, + "loss": 1.9968928098678589, + "loss_ce": 0.00372876413166523, + "loss_iou": 0.8984375, + "loss_num": 0.0390625, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 44231064, + "step": 350 + }, + { + "epoch": 0.09004040274482139, + "grad_norm": 32.745452880859375, + "learning_rate": 5e-06, + "loss": 2.0204, + "num_input_tokens_seen": 44356472, + "step": 351 + }, + { + "epoch": 0.09004040274482139, + "loss": 1.8313908576965332, + "loss_ce": 0.0013127943966537714, + "loss_iou": 0.83984375, + "loss_num": 0.0306396484375, + "loss_xval": 1.828125, + "num_input_tokens_seen": 44356472, + "step": 351 + }, + { + "epoch": 0.09029692810876676, + "grad_norm": 45.47397232055664, + "learning_rate": 5e-06, + "loss": 2.0665, + "num_input_tokens_seen": 44481532, + "step": 352 + }, + { + "epoch": 0.09029692810876676, + "loss": 1.9851510524749756, + "loss_ce": 0.003705753944814205, + "loss_iou": 0.90625, + "loss_num": 0.03369140625, + "loss_xval": 1.984375, + "num_input_tokens_seen": 44481532, + "step": 352 + }, + { + "epoch": 0.09055345347271211, + "grad_norm": 39.66913986206055, + "learning_rate": 5e-06, + "loss": 2.0952, + "num_input_tokens_seen": 44608676, + "step": 353 + }, + { + "epoch": 0.09055345347271211, + "loss": 2.054137706756592, + "loss_ce": 0.0014034186024218798, + "loss_iou": 0.92578125, + "loss_num": 0.0400390625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 44608676, + "step": 353 + }, + { + "epoch": 0.09080997883665748, + "grad_norm": 31.24917984008789, + "learning_rate": 5e-06, + "loss": 1.8657, + "num_input_tokens_seen": 44734648, + "step": 354 + }, + { + "epoch": 0.09080997883665748, + "loss": 1.877683401107788, + "loss_ce": 0.0017068713204935193, + "loss_iou": 0.83203125, + "loss_num": 0.04248046875, + "loss_xval": 1.875, + "num_input_tokens_seen": 44734648, + "step": 354 + }, + { + "epoch": 0.09106650420060283, + "grad_norm": 58.90100860595703, + "learning_rate": 5e-06, + "loss": 1.9046, + "num_input_tokens_seen": 44861268, + "step": 355 + }, + { + "epoch": 0.09106650420060283, + "loss": 1.8268811702728271, + "loss_ce": 0.00363890896551311, + "loss_iou": 0.83984375, + "loss_num": 0.0281982421875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 44861268, + "step": 355 + }, + { + "epoch": 0.0913230295645482, + "grad_norm": 46.838199615478516, + "learning_rate": 5e-06, + "loss": 2.1363, + "num_input_tokens_seen": 44986912, + "step": 356 + }, + { + "epoch": 0.0913230295645482, + "loss": 2.0906214714050293, + "loss_ce": 0.0017541771521791816, + "loss_iou": 0.94140625, + "loss_num": 0.041015625, + "loss_xval": 2.09375, + "num_input_tokens_seen": 44986912, + "step": 356 + }, + { + "epoch": 0.09157955492849355, + "grad_norm": 32.001922607421875, + "learning_rate": 5e-06, + "loss": 1.7258, + "num_input_tokens_seen": 45114500, + "step": 357 + }, + { + "epoch": 0.09157955492849355, + "loss": 1.6585767269134521, + "loss_ce": 0.0033032733481377363, + "loss_iou": 0.75, + "loss_num": 0.03173828125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 45114500, + "step": 357 + }, + { + "epoch": 0.09183608029243892, + "grad_norm": 54.96855926513672, + "learning_rate": 5e-06, + "loss": 1.7968, + "num_input_tokens_seen": 45241052, + "step": 358 + }, + { + "epoch": 0.09183608029243892, + "loss": 1.9780910015106201, + "loss_ce": 0.0025050523690879345, + "loss_iou": 0.8828125, + "loss_num": 0.0419921875, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 45241052, + "step": 358 + }, + { + "epoch": 0.09209260565638427, + "grad_norm": 42.633201599121094, + "learning_rate": 5e-06, + "loss": 1.9386, + "num_input_tokens_seen": 45367884, + "step": 359 + }, + { + "epoch": 0.09209260565638427, + "loss": 2.0292630195617676, + "loss_ce": 0.0019192514009773731, + "loss_iou": 0.9140625, + "loss_num": 0.040283203125, + "loss_xval": 2.03125, + "num_input_tokens_seen": 45367884, + "step": 359 + }, + { + "epoch": 0.09234913102032964, + "grad_norm": 37.04764938354492, + "learning_rate": 5e-06, + "loss": 1.6593, + "num_input_tokens_seen": 45494572, + "step": 360 + }, + { + "epoch": 0.09234913102032964, + "loss": 1.745734691619873, + "loss_ce": 0.0006176084280014038, + "loss_iou": 0.796875, + "loss_num": 0.030517578125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 45494572, + "step": 360 + }, + { + "epoch": 0.092605656384275, + "grad_norm": 54.436317443847656, + "learning_rate": 5e-06, + "loss": 1.91, + "num_input_tokens_seen": 45621684, + "step": 361 + }, + { + "epoch": 0.092605656384275, + "loss": 1.8512158393859863, + "loss_ce": 0.0006300181848928332, + "loss_iou": 0.83984375, + "loss_num": 0.03466796875, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 45621684, + "step": 361 + }, + { + "epoch": 0.09286218174822035, + "grad_norm": 92.71322631835938, + "learning_rate": 5e-06, + "loss": 2.0366, + "num_input_tokens_seen": 45748460, + "step": 362 + }, + { + "epoch": 0.09286218174822035, + "loss": 1.9969830513000488, + "loss_ce": 0.0008892226032912731, + "loss_iou": 0.93359375, + "loss_num": 0.0264892578125, + "loss_xval": 2.0, + "num_input_tokens_seen": 45748460, + "step": 362 + }, + { + "epoch": 0.09311870711216572, + "grad_norm": 30.931638717651367, + "learning_rate": 5e-06, + "loss": 2.1905, + "num_input_tokens_seen": 45874904, + "step": 363 + }, + { + "epoch": 0.09311870711216572, + "loss": 2.270930767059326, + "loss_ce": 0.005305903032422066, + "loss_iou": 0.97265625, + "loss_num": 0.06396484375, + "loss_xval": 2.265625, + "num_input_tokens_seen": 45874904, + "step": 363 + }, + { + "epoch": 0.09337523247611107, + "grad_norm": 33.67181396484375, + "learning_rate": 5e-06, + "loss": 1.9985, + "num_input_tokens_seen": 46001452, + "step": 364 + }, + { + "epoch": 0.09337523247611107, + "loss": 1.8927936553955078, + "loss_ce": 0.0031452039256691933, + "loss_iou": 0.84375, + "loss_num": 0.041015625, + "loss_xval": 1.890625, + "num_input_tokens_seen": 46001452, + "step": 364 + }, + { + "epoch": 0.09363175784005644, + "grad_norm": 29.497053146362305, + "learning_rate": 5e-06, + "loss": 1.8975, + "num_input_tokens_seen": 46128216, + "step": 365 + }, + { + "epoch": 0.09363175784005644, + "loss": 1.8923070430755615, + "loss_ce": 0.004611681215465069, + "loss_iou": 0.84765625, + "loss_num": 0.0380859375, + "loss_xval": 1.890625, + "num_input_tokens_seen": 46128216, + "step": 365 + }, + { + "epoch": 0.09388828320400179, + "grad_norm": 51.1405143737793, + "learning_rate": 5e-06, + "loss": 1.9559, + "num_input_tokens_seen": 46253788, + "step": 366 + }, + { + "epoch": 0.09388828320400179, + "loss": 2.061835765838623, + "loss_ce": 0.004218538291752338, + "loss_iou": 0.921875, + "loss_num": 0.04248046875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 46253788, + "step": 366 + }, + { + "epoch": 0.09414480856794716, + "grad_norm": 46.46742248535156, + "learning_rate": 5e-06, + "loss": 1.9501, + "num_input_tokens_seen": 46379996, + "step": 367 + }, + { + "epoch": 0.09414480856794716, + "loss": 2.1706390380859375, + "loss_ce": 0.006576630752533674, + "loss_iou": 0.9609375, + "loss_num": 0.04833984375, + "loss_xval": 2.15625, + "num_input_tokens_seen": 46379996, + "step": 367 + }, + { + "epoch": 0.09440133393189251, + "grad_norm": 49.92007064819336, + "learning_rate": 5e-06, + "loss": 1.8015, + "num_input_tokens_seen": 46505744, + "step": 368 + }, + { + "epoch": 0.09440133393189251, + "loss": 1.6263514757156372, + "loss_ce": 0.0013514544116333127, + "loss_iou": 0.75390625, + "loss_num": 0.0228271484375, + "loss_xval": 1.625, + "num_input_tokens_seen": 46505744, + "step": 368 + }, + { + "epoch": 0.09465785929583788, + "grad_norm": 51.64584732055664, + "learning_rate": 5e-06, + "loss": 2.0452, + "num_input_tokens_seen": 46631548, + "step": 369 + }, + { + "epoch": 0.09465785929583788, + "loss": 2.0989174842834473, + "loss_ce": 0.005167662166059017, + "loss_iou": 0.9296875, + "loss_num": 0.046875, + "loss_xval": 2.09375, + "num_input_tokens_seen": 46631548, + "step": 369 + }, + { + "epoch": 0.09491438465978323, + "grad_norm": 45.7161979675293, + "learning_rate": 5e-06, + "loss": 1.8576, + "num_input_tokens_seen": 46758004, + "step": 370 + }, + { + "epoch": 0.09491438465978323, + "loss": 2.003556966781616, + "loss_ce": 0.0035570072941482067, + "loss_iou": 0.90234375, + "loss_num": 0.039306640625, + "loss_xval": 2.0, + "num_input_tokens_seen": 46758004, + "step": 370 + }, + { + "epoch": 0.0951709100237286, + "grad_norm": 58.55605697631836, + "learning_rate": 5e-06, + "loss": 1.9176, + "num_input_tokens_seen": 46885204, + "step": 371 + }, + { + "epoch": 0.0951709100237286, + "loss": 2.1191670894622803, + "loss_ce": 0.004909203387796879, + "loss_iou": 0.94921875, + "loss_num": 0.043701171875, + "loss_xval": 2.109375, + "num_input_tokens_seen": 46885204, + "step": 371 + }, + { + "epoch": 0.09542743538767395, + "grad_norm": 111.11799621582031, + "learning_rate": 5e-06, + "loss": 1.875, + "num_input_tokens_seen": 47012672, + "step": 372 + }, + { + "epoch": 0.09542743538767395, + "loss": 1.8831158876419067, + "loss_ce": 0.004209638107568026, + "loss_iou": 0.87109375, + "loss_num": 0.02783203125, + "loss_xval": 1.875, + "num_input_tokens_seen": 47012672, + "step": 372 + }, + { + "epoch": 0.09568396075161932, + "grad_norm": 41.17919158935547, + "learning_rate": 5e-06, + "loss": 2.1693, + "num_input_tokens_seen": 47138180, + "step": 373 + }, + { + "epoch": 0.09568396075161932, + "loss": 2.2909669876098633, + "loss_ce": 0.007763751316815615, + "loss_iou": 1.0, + "loss_num": 0.0576171875, + "loss_xval": 2.28125, + "num_input_tokens_seen": 47138180, + "step": 373 + }, + { + "epoch": 0.09594048611556467, + "grad_norm": 46.01017761230469, + "learning_rate": 5e-06, + "loss": 2.082, + "num_input_tokens_seen": 47264372, + "step": 374 + }, + { + "epoch": 0.09594048611556467, + "loss": 2.1428093910217285, + "loss_ce": 0.005114227067679167, + "loss_iou": 0.94921875, + "loss_num": 0.04833984375, + "loss_xval": 2.140625, + "num_input_tokens_seen": 47264372, + "step": 374 + }, + { + "epoch": 0.09619701147951004, + "grad_norm": 41.89747619628906, + "learning_rate": 5e-06, + "loss": 2.0733, + "num_input_tokens_seen": 47390860, + "step": 375 + }, + { + "epoch": 0.09619701147951004, + "loss": 1.9032719135284424, + "loss_ce": 0.0009281990933232009, + "loss_iou": 0.8671875, + "loss_num": 0.033203125, + "loss_xval": 1.90625, + "num_input_tokens_seen": 47390860, + "step": 375 + }, + { + "epoch": 0.0964535368434554, + "grad_norm": 26.977554321289062, + "learning_rate": 5e-06, + "loss": 2.0231, + "num_input_tokens_seen": 47517004, + "step": 376 + }, + { + "epoch": 0.0964535368434554, + "loss": 1.9311347007751465, + "loss_ce": 0.00047068134881556034, + "loss_iou": 0.87109375, + "loss_num": 0.0380859375, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 47517004, + "step": 376 + }, + { + "epoch": 0.09671006220740076, + "grad_norm": 27.046398162841797, + "learning_rate": 5e-06, + "loss": 1.7927, + "num_input_tokens_seen": 47643720, + "step": 377 + }, + { + "epoch": 0.09671006220740076, + "loss": 1.7474606037139893, + "loss_ce": 0.0013667582534253597, + "loss_iou": 0.8046875, + "loss_num": 0.026611328125, + "loss_xval": 1.75, + "num_input_tokens_seen": 47643720, + "step": 377 + }, + { + "epoch": 0.09696658757134612, + "grad_norm": 27.567779541015625, + "learning_rate": 5e-06, + "loss": 1.782, + "num_input_tokens_seen": 47770320, + "step": 378 + }, + { + "epoch": 0.09696658757134612, + "loss": 1.847099781036377, + "loss_ce": 0.010185705497860909, + "loss_iou": 0.8125, + "loss_num": 0.042236328125, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 47770320, + "step": 378 + }, + { + "epoch": 0.09722311293529148, + "grad_norm": 42.8397331237793, + "learning_rate": 5e-06, + "loss": 1.7814, + "num_input_tokens_seen": 47897564, + "step": 379 + }, + { + "epoch": 0.09722311293529148, + "loss": 1.6979308128356934, + "loss_ce": 0.0026183421723544598, + "loss_iou": 0.78515625, + "loss_num": 0.025634765625, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 47897564, + "step": 379 + }, + { + "epoch": 0.09747963829923684, + "grad_norm": 39.85624313354492, + "learning_rate": 5e-06, + "loss": 1.8764, + "num_input_tokens_seen": 48023776, + "step": 380 + }, + { + "epoch": 0.09747963829923684, + "loss": 1.981938123703003, + "loss_ce": 0.009281916543841362, + "loss_iou": 0.890625, + "loss_num": 0.03857421875, + "loss_xval": 1.96875, + "num_input_tokens_seen": 48023776, + "step": 380 + }, + { + "epoch": 0.09773616366318219, + "grad_norm": 76.34963989257812, + "learning_rate": 5e-06, + "loss": 1.927, + "num_input_tokens_seen": 48150584, + "step": 381 + }, + { + "epoch": 0.09773616366318219, + "loss": 2.009948253631592, + "loss_ce": 0.005065613891929388, + "loss_iou": 0.921875, + "loss_num": 0.031982421875, + "loss_xval": 2.0, + "num_input_tokens_seen": 48150584, + "step": 381 + }, + { + "epoch": 0.09799268902712756, + "grad_norm": 37.40764617919922, + "learning_rate": 5e-06, + "loss": 2.2328, + "num_input_tokens_seen": 48276472, + "step": 382 + }, + { + "epoch": 0.09799268902712756, + "loss": 2.1764633655548096, + "loss_ce": 0.0006822074647061527, + "loss_iou": 0.96875, + "loss_num": 0.0478515625, + "loss_xval": 2.171875, + "num_input_tokens_seen": 48276472, + "step": 382 + }, + { + "epoch": 0.09824921439107291, + "grad_norm": 37.27070999145508, + "learning_rate": 5e-06, + "loss": 1.9252, + "num_input_tokens_seen": 48403056, + "step": 383 + }, + { + "epoch": 0.09824921439107291, + "loss": 1.8889168500900269, + "loss_ce": 0.0021980367600917816, + "loss_iou": 0.84765625, + "loss_num": 0.0380859375, + "loss_xval": 1.890625, + "num_input_tokens_seen": 48403056, + "step": 383 + }, + { + "epoch": 0.09850573975501828, + "grad_norm": 27.008848190307617, + "learning_rate": 5e-06, + "loss": 1.8778, + "num_input_tokens_seen": 48528428, + "step": 384 + }, + { + "epoch": 0.09850573975501828, + "loss": 1.9574403762817383, + "loss_ce": 0.003338810056447983, + "loss_iou": 0.875, + "loss_num": 0.041259765625, + "loss_xval": 1.953125, + "num_input_tokens_seen": 48528428, + "step": 384 + }, + { + "epoch": 0.09876226511896363, + "grad_norm": 34.96820068359375, + "learning_rate": 5e-06, + "loss": 1.7085, + "num_input_tokens_seen": 48653992, + "step": 385 + }, + { + "epoch": 0.09876226511896363, + "loss": 1.7924166917800903, + "loss_ce": 0.012143252417445183, + "loss_iou": 0.80078125, + "loss_num": 0.0361328125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 48653992, + "step": 385 + }, + { + "epoch": 0.099018790482909, + "grad_norm": 65.2629165649414, + "learning_rate": 5e-06, + "loss": 1.8238, + "num_input_tokens_seen": 48780472, + "step": 386 + }, + { + "epoch": 0.099018790482909, + "loss": 2.009025812149048, + "loss_ce": 0.003166389651596546, + "loss_iou": 0.91015625, + "loss_num": 0.037841796875, + "loss_xval": 2.0, + "num_input_tokens_seen": 48780472, + "step": 386 + }, + { + "epoch": 0.09927531584685435, + "grad_norm": 45.25067901611328, + "learning_rate": 5e-06, + "loss": 2.0578, + "num_input_tokens_seen": 48907968, + "step": 387 + }, + { + "epoch": 0.09927531584685435, + "loss": 2.186983108520508, + "loss_ce": 0.002412930829450488, + "loss_iou": 0.96875, + "loss_num": 0.04931640625, + "loss_xval": 2.1875, + "num_input_tokens_seen": 48907968, + "step": 387 + }, + { + "epoch": 0.09953184121079972, + "grad_norm": 32.712642669677734, + "learning_rate": 5e-06, + "loss": 1.9374, + "num_input_tokens_seen": 49033404, + "step": 388 + }, + { + "epoch": 0.09953184121079972, + "loss": 2.0127270221710205, + "loss_ce": 0.005891070701181889, + "loss_iou": 0.890625, + "loss_num": 0.044921875, + "loss_xval": 2.0, + "num_input_tokens_seen": 49033404, + "step": 388 + }, + { + "epoch": 0.09978836657474507, + "grad_norm": 87.1080551147461, + "learning_rate": 5e-06, + "loss": 1.6803, + "num_input_tokens_seen": 49159752, + "step": 389 + }, + { + "epoch": 0.09978836657474507, + "loss": 1.70613694190979, + "loss_ce": 0.002035434590652585, + "loss_iou": 0.796875, + "loss_num": 0.0220947265625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 49159752, + "step": 389 + }, + { + "epoch": 0.10004489193869044, + "grad_norm": 39.38822937011719, + "learning_rate": 5e-06, + "loss": 2.0674, + "num_input_tokens_seen": 49285864, + "step": 390 + }, + { + "epoch": 0.10004489193869044, + "loss": 1.9568365812301636, + "loss_ce": 0.0007818570593371987, + "loss_iou": 0.8828125, + "loss_num": 0.038818359375, + "loss_xval": 1.953125, + "num_input_tokens_seen": 49285864, + "step": 390 + }, + { + "epoch": 0.1003014173026358, + "grad_norm": 50.23189163208008, + "learning_rate": 5e-06, + "loss": 1.7963, + "num_input_tokens_seen": 49412656, + "step": 391 + }, + { + "epoch": 0.1003014173026358, + "loss": 1.7448450326919556, + "loss_ce": 0.0016809296794235706, + "loss_iou": 0.80078125, + "loss_num": 0.028564453125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 49412656, + "step": 391 + }, + { + "epoch": 0.10055794266658116, + "grad_norm": 45.05488204956055, + "learning_rate": 5e-06, + "loss": 1.8193, + "num_input_tokens_seen": 49538876, + "step": 392 + }, + { + "epoch": 0.10055794266658116, + "loss": 1.7290843725204468, + "loss_ce": 0.0025218932423740625, + "loss_iou": 0.78125, + "loss_num": 0.0322265625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 49538876, + "step": 392 + }, + { + "epoch": 0.10081446803052652, + "grad_norm": 45.206180572509766, + "learning_rate": 5e-06, + "loss": 1.8259, + "num_input_tokens_seen": 49665532, + "step": 393 + }, + { + "epoch": 0.10081446803052652, + "loss": 1.7593746185302734, + "loss_ce": 0.0025386556517332792, + "loss_iou": 0.796875, + "loss_num": 0.03173828125, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 49665532, + "step": 393 + }, + { + "epoch": 0.10107099339447188, + "grad_norm": 44.7424201965332, + "learning_rate": 5e-06, + "loss": 1.8836, + "num_input_tokens_seen": 49792176, + "step": 394 + }, + { + "epoch": 0.10107099339447188, + "loss": 1.9938554763793945, + "loss_ce": 0.004597583785653114, + "loss_iou": 0.89453125, + "loss_num": 0.039794921875, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 49792176, + "step": 394 + }, + { + "epoch": 0.10132751875841724, + "grad_norm": 63.11256408691406, + "learning_rate": 5e-06, + "loss": 1.8382, + "num_input_tokens_seen": 49918192, + "step": 395 + }, + { + "epoch": 0.10132751875841724, + "loss": 1.7171096801757812, + "loss_ce": 0.0022659602109342813, + "loss_iou": 0.7890625, + "loss_num": 0.0269775390625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 49918192, + "step": 395 + }, + { + "epoch": 0.1015840441223626, + "grad_norm": 45.020912170410156, + "learning_rate": 5e-06, + "loss": 2.093, + "num_input_tokens_seen": 50044196, + "step": 396 + }, + { + "epoch": 0.1015840441223626, + "loss": 2.035141944885254, + "loss_ce": 0.0019388271030038595, + "loss_iou": 0.9140625, + "loss_num": 0.041015625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 50044196, + "step": 396 + }, + { + "epoch": 0.10184056948630796, + "grad_norm": 30.95770835876465, + "learning_rate": 5e-06, + "loss": 1.8656, + "num_input_tokens_seen": 50169376, + "step": 397 + }, + { + "epoch": 0.10184056948630796, + "loss": 1.7575057744979858, + "loss_ce": 0.0006698445649817586, + "loss_iou": 0.80078125, + "loss_num": 0.0303955078125, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 50169376, + "step": 397 + }, + { + "epoch": 0.10209709485025333, + "grad_norm": 72.34020233154297, + "learning_rate": 5e-06, + "loss": 1.8197, + "num_input_tokens_seen": 50293692, + "step": 398 + }, + { + "epoch": 0.10209709485025333, + "loss": 1.7731654644012451, + "loss_ce": 0.0016812339890748262, + "loss_iou": 0.8203125, + "loss_num": 0.0267333984375, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 50293692, + "step": 398 + }, + { + "epoch": 0.10235362021419868, + "grad_norm": 45.61842727661133, + "learning_rate": 5e-06, + "loss": 1.9679, + "num_input_tokens_seen": 50419636, + "step": 399 + }, + { + "epoch": 0.10235362021419868, + "loss": 2.0103721618652344, + "loss_ce": 0.0025595633778721094, + "loss_iou": 0.890625, + "loss_num": 0.044677734375, + "loss_xval": 2.0, + "num_input_tokens_seen": 50419636, + "step": 399 + }, + { + "epoch": 0.10261014557814403, + "grad_norm": 44.74113464355469, + "learning_rate": 5e-06, + "loss": 1.7642, + "num_input_tokens_seen": 50547072, + "step": 400 + }, + { + "epoch": 0.10261014557814403, + "loss": 1.5722901821136475, + "loss_ce": 0.0010011489503085613, + "loss_iou": 0.7265625, + "loss_num": 0.0240478515625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 50547072, + "step": 400 + }, + { + "epoch": 0.1028666709420894, + "grad_norm": 101.76426696777344, + "learning_rate": 5e-06, + "loss": 1.9802, + "num_input_tokens_seen": 50673264, + "step": 401 + }, + { + "epoch": 0.1028666709420894, + "loss": 2.0145387649536133, + "loss_ce": 0.0037967341486364603, + "loss_iou": 0.91796875, + "loss_num": 0.03466796875, + "loss_xval": 2.015625, + "num_input_tokens_seen": 50673264, + "step": 401 + }, + { + "epoch": 0.10312319630603475, + "grad_norm": 30.73699188232422, + "learning_rate": 5e-06, + "loss": 2.1181, + "num_input_tokens_seen": 50800320, + "step": 402 + }, + { + "epoch": 0.10312319630603475, + "loss": 1.9271509647369385, + "loss_ce": 0.00234632333740592, + "loss_iou": 0.86328125, + "loss_num": 0.039794921875, + "loss_xval": 1.921875, + "num_input_tokens_seen": 50800320, + "step": 402 + }, + { + "epoch": 0.10337972166998012, + "grad_norm": 91.9260482788086, + "learning_rate": 5e-06, + "loss": 1.8528, + "num_input_tokens_seen": 50926476, + "step": 403 + }, + { + "epoch": 0.10337972166998012, + "loss": 1.7707874774932861, + "loss_ce": 0.005162440240383148, + "loss_iou": 0.79296875, + "loss_num": 0.03564453125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 50926476, + "step": 403 + }, + { + "epoch": 0.10363624703392547, + "grad_norm": 38.76560974121094, + "learning_rate": 5e-06, + "loss": 1.8079, + "num_input_tokens_seen": 51054132, + "step": 404 + }, + { + "epoch": 0.10363624703392547, + "loss": 1.9710948467254639, + "loss_ce": 0.0023449005093425512, + "loss_iou": 0.8828125, + "loss_num": 0.041015625, + "loss_xval": 1.96875, + "num_input_tokens_seen": 51054132, + "step": 404 + }, + { + "epoch": 0.10389277239787084, + "grad_norm": 36.57107925415039, + "learning_rate": 5e-06, + "loss": 1.714, + "num_input_tokens_seen": 51180276, + "step": 405 + }, + { + "epoch": 0.10389277239787084, + "loss": 1.557878851890564, + "loss_ce": 0.0012381927808746696, + "loss_iou": 0.71875, + "loss_num": 0.024169921875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 51180276, + "step": 405 + }, + { + "epoch": 0.1041492977618162, + "grad_norm": 67.13604736328125, + "learning_rate": 5e-06, + "loss": 1.8672, + "num_input_tokens_seen": 51305076, + "step": 406 + }, + { + "epoch": 0.1041492977618162, + "loss": 2.1625876426696777, + "loss_ce": 0.007314011454582214, + "loss_iou": 0.96484375, + "loss_num": 0.045166015625, + "loss_xval": 2.15625, + "num_input_tokens_seen": 51305076, + "step": 406 + }, + { + "epoch": 0.10440582312576156, + "grad_norm": 46.04188919067383, + "learning_rate": 5e-06, + "loss": 1.9085, + "num_input_tokens_seen": 51431172, + "step": 407 + }, + { + "epoch": 0.10440582312576156, + "loss": 2.0135154724121094, + "loss_ce": 0.0008201323798857629, + "loss_iou": 0.90625, + "loss_num": 0.040283203125, + "loss_xval": 2.015625, + "num_input_tokens_seen": 51431172, + "step": 407 + }, + { + "epoch": 0.10466234848970692, + "grad_norm": 36.4671630859375, + "learning_rate": 5e-06, + "loss": 1.8289, + "num_input_tokens_seen": 51557136, + "step": 408 + }, + { + "epoch": 0.10466234848970692, + "loss": 1.9439496994018555, + "loss_ce": 0.006449779495596886, + "loss_iou": 0.8671875, + "loss_num": 0.04052734375, + "loss_xval": 1.9375, + "num_input_tokens_seen": 51557136, + "step": 408 + }, + { + "epoch": 0.10491887385365228, + "grad_norm": 60.42519760131836, + "learning_rate": 5e-06, + "loss": 1.7971, + "num_input_tokens_seen": 51682540, + "step": 409 + }, + { + "epoch": 0.10491887385365228, + "loss": 1.6277374029159546, + "loss_ce": 0.0007843549246899784, + "loss_iou": 0.75, + "loss_num": 0.02490234375, + "loss_xval": 1.625, + "num_input_tokens_seen": 51682540, + "step": 409 + }, + { + "epoch": 0.10517539921759764, + "grad_norm": 45.99513626098633, + "learning_rate": 5e-06, + "loss": 1.8143, + "num_input_tokens_seen": 51808536, + "step": 410 + }, + { + "epoch": 0.10517539921759764, + "loss": 1.5829787254333496, + "loss_ce": 0.0009474909165874124, + "loss_iou": 0.74609375, + "loss_num": 0.0185546875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 51808536, + "step": 410 + }, + { + "epoch": 0.105431924581543, + "grad_norm": 55.012760162353516, + "learning_rate": 5e-06, + "loss": 1.8428, + "num_input_tokens_seen": 51935036, + "step": 411 + }, + { + "epoch": 0.105431924581543, + "loss": 1.7962709665298462, + "loss_ce": 0.002325579058378935, + "loss_iou": 0.8125, + "loss_num": 0.033447265625, + "loss_xval": 1.796875, + "num_input_tokens_seen": 51935036, + "step": 411 + }, + { + "epoch": 0.10568844994548836, + "grad_norm": 101.16960144042969, + "learning_rate": 5e-06, + "loss": 1.8949, + "num_input_tokens_seen": 52061100, + "step": 412 + }, + { + "epoch": 0.10568844994548836, + "loss": 1.9165418148040771, + "loss_ce": 0.0034558130428195, + "loss_iou": 0.87890625, + "loss_num": 0.03076171875, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 52061100, + "step": 412 + }, + { + "epoch": 0.10594497530943373, + "grad_norm": 39.75946807861328, + "learning_rate": 5e-06, + "loss": 2.041, + "num_input_tokens_seen": 52187212, + "step": 413 + }, + { + "epoch": 0.10594497530943373, + "loss": 2.13708758354187, + "loss_ce": 0.0032984924037009478, + "loss_iou": 0.9375, + "loss_num": 0.051025390625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 52187212, + "step": 413 + }, + { + "epoch": 0.10620150067337908, + "grad_norm": 36.745365142822266, + "learning_rate": 5e-06, + "loss": 1.8476, + "num_input_tokens_seen": 52314332, + "step": 414 + }, + { + "epoch": 0.10620150067337908, + "loss": 1.70456862449646, + "loss_ce": 0.0004671252390835434, + "loss_iou": 0.77734375, + "loss_num": 0.029541015625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 52314332, + "step": 414 + }, + { + "epoch": 0.10645802603732445, + "grad_norm": 41.343719482421875, + "learning_rate": 5e-06, + "loss": 1.6172, + "num_input_tokens_seen": 52439956, + "step": 415 + }, + { + "epoch": 0.10645802603732445, + "loss": 1.7731833457946777, + "loss_ce": 0.0016989423893392086, + "loss_iou": 0.81640625, + "loss_num": 0.0279541015625, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 52439956, + "step": 415 + }, + { + "epoch": 0.1067145514012698, + "grad_norm": 52.96699142456055, + "learning_rate": 5e-06, + "loss": 1.7222, + "num_input_tokens_seen": 52567116, + "step": 416 + }, + { + "epoch": 0.1067145514012698, + "loss": 1.8557533025741577, + "loss_ce": 0.0012610559351742268, + "loss_iou": 0.8359375, + "loss_num": 0.03662109375, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 52567116, + "step": 416 + }, + { + "epoch": 0.10697107676521517, + "grad_norm": 32.44997024536133, + "learning_rate": 5e-06, + "loss": 1.8443, + "num_input_tokens_seen": 52691920, + "step": 417 + }, + { + "epoch": 0.10697107676521517, + "loss": 1.8001456260681152, + "loss_ce": 0.005223775748163462, + "loss_iou": 0.8046875, + "loss_num": 0.037109375, + "loss_xval": 1.796875, + "num_input_tokens_seen": 52691920, + "step": 417 + }, + { + "epoch": 0.10722760212916052, + "grad_norm": 28.522615432739258, + "learning_rate": 5e-06, + "loss": 1.7361, + "num_input_tokens_seen": 52817552, + "step": 418 + }, + { + "epoch": 0.10722760212916052, + "loss": 1.5261805057525635, + "loss_ce": 0.0007898452458903193, + "loss_iou": 0.72265625, + "loss_num": 0.0166015625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 52817552, + "step": 418 + }, + { + "epoch": 0.10748412749310587, + "grad_norm": 56.645729064941406, + "learning_rate": 5e-06, + "loss": 1.8134, + "num_input_tokens_seen": 52943824, + "step": 419 + }, + { + "epoch": 0.10748412749310587, + "loss": 2.0826609134674072, + "loss_ce": 0.003559364937245846, + "loss_iou": 0.9375, + "loss_num": 0.040283203125, + "loss_xval": 2.078125, + "num_input_tokens_seen": 52943824, + "step": 419 + }, + { + "epoch": 0.10774065285705124, + "grad_norm": 63.598140716552734, + "learning_rate": 5e-06, + "loss": 1.7619, + "num_input_tokens_seen": 53070772, + "step": 420 + }, + { + "epoch": 0.10774065285705124, + "loss": 1.6294734477996826, + "loss_ce": 0.001543697202578187, + "loss_iou": 0.7578125, + "loss_num": 0.0224609375, + "loss_xval": 1.625, + "num_input_tokens_seen": 53070772, + "step": 420 + }, + { + "epoch": 0.1079971782209966, + "grad_norm": 79.30410766601562, + "learning_rate": 5e-06, + "loss": 2.0085, + "num_input_tokens_seen": 53196960, + "step": 421 + }, + { + "epoch": 0.1079971782209966, + "loss": 2.0633955001831055, + "loss_ce": 0.0028485646471381187, + "loss_iou": 0.9296875, + "loss_num": 0.041015625, + "loss_xval": 2.0625, + "num_input_tokens_seen": 53196960, + "step": 421 + }, + { + "epoch": 0.10825370358494196, + "grad_norm": 41.807945251464844, + "learning_rate": 5e-06, + "loss": 1.9425, + "num_input_tokens_seen": 53322828, + "step": 422 + }, + { + "epoch": 0.10825370358494196, + "loss": 2.0428686141967773, + "loss_ce": 0.0028295726515352726, + "loss_iou": 0.8984375, + "loss_num": 0.04833984375, + "loss_xval": 2.046875, + "num_input_tokens_seen": 53322828, + "step": 422 + }, + { + "epoch": 0.10851022894888732, + "grad_norm": 38.706390380859375, + "learning_rate": 5e-06, + "loss": 1.7605, + "num_input_tokens_seen": 53448580, + "step": 423 + }, + { + "epoch": 0.10851022894888732, + "loss": 1.6660207509994507, + "loss_ce": 0.002934785559773445, + "loss_iou": 0.76171875, + "loss_num": 0.02734375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 53448580, + "step": 423 + }, + { + "epoch": 0.10876675431283268, + "grad_norm": 42.25354766845703, + "learning_rate": 5e-06, + "loss": 1.6892, + "num_input_tokens_seen": 53575176, + "step": 424 + }, + { + "epoch": 0.10876675431283268, + "loss": 1.6477659940719604, + "loss_ce": 0.001281648874282837, + "loss_iou": 0.74609375, + "loss_num": 0.03076171875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 53575176, + "step": 424 + }, + { + "epoch": 0.10902327967677804, + "grad_norm": 105.33165740966797, + "learning_rate": 5e-06, + "loss": 1.8146, + "num_input_tokens_seen": 53702324, + "step": 425 + }, + { + "epoch": 0.10902327967677804, + "loss": 1.7754220962524414, + "loss_ce": 0.002961251884698868, + "loss_iou": 0.80859375, + "loss_num": 0.0311279296875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 53702324, + "step": 425 + }, + { + "epoch": 0.1092798050407234, + "grad_norm": 46.32204818725586, + "learning_rate": 5e-06, + "loss": 2.0274, + "num_input_tokens_seen": 53828256, + "step": 426 + }, + { + "epoch": 0.1092798050407234, + "loss": 1.9903128147125244, + "loss_ce": 0.002031689276918769, + "loss_iou": 0.90625, + "loss_num": 0.035888671875, + "loss_xval": 1.984375, + "num_input_tokens_seen": 53828256, + "step": 426 + }, + { + "epoch": 0.10953633040466876, + "grad_norm": 36.0590705871582, + "learning_rate": 5e-06, + "loss": 1.9017, + "num_input_tokens_seen": 53954976, + "step": 427 + }, + { + "epoch": 0.10953633040466876, + "loss": 1.9017456769943237, + "loss_ce": 0.0023316240403801203, + "loss_iou": 0.83984375, + "loss_num": 0.044677734375, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 53954976, + "step": 427 + }, + { + "epoch": 0.10979285576861413, + "grad_norm": 34.90947341918945, + "learning_rate": 5e-06, + "loss": 1.8217, + "num_input_tokens_seen": 54081696, + "step": 428 + }, + { + "epoch": 0.10979285576861413, + "loss": 1.701188087463379, + "loss_ce": 0.0009927484206855297, + "loss_iou": 0.765625, + "loss_num": 0.0341796875, + "loss_xval": 1.703125, + "num_input_tokens_seen": 54081696, + "step": 428 + }, + { + "epoch": 0.11004938113255948, + "grad_norm": 35.385154724121094, + "learning_rate": 5e-06, + "loss": 1.8222, + "num_input_tokens_seen": 54207260, + "step": 429 + }, + { + "epoch": 0.11004938113255948, + "loss": 1.9147478342056274, + "loss_ce": 0.002638458739966154, + "loss_iou": 0.859375, + "loss_num": 0.03857421875, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 54207260, + "step": 429 + }, + { + "epoch": 0.11030590649650485, + "grad_norm": 68.71649169921875, + "learning_rate": 5e-06, + "loss": 1.7898, + "num_input_tokens_seen": 54333640, + "step": 430 + }, + { + "epoch": 0.11030590649650485, + "loss": 1.9323217868804932, + "loss_ce": 0.003610863583162427, + "loss_iou": 0.86328125, + "loss_num": 0.04150390625, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 54333640, + "step": 430 + }, + { + "epoch": 0.1105624318604502, + "grad_norm": 34.09584426879883, + "learning_rate": 5e-06, + "loss": 1.956, + "num_input_tokens_seen": 54461124, + "step": 431 + }, + { + "epoch": 0.1105624318604502, + "loss": 1.970023512840271, + "loss_ce": 0.003226581495255232, + "loss_iou": 0.875, + "loss_num": 0.0439453125, + "loss_xval": 1.96875, + "num_input_tokens_seen": 54461124, + "step": 431 + }, + { + "epoch": 0.11081895722439557, + "grad_norm": 58.23276138305664, + "learning_rate": 5e-06, + "loss": 1.8524, + "num_input_tokens_seen": 54587948, + "step": 432 + }, + { + "epoch": 0.11081895722439557, + "loss": 1.702561616897583, + "loss_ce": 0.0013897698372602463, + "loss_iou": 0.7734375, + "loss_num": 0.03125, + "loss_xval": 1.703125, + "num_input_tokens_seen": 54587948, + "step": 432 + }, + { + "epoch": 0.11107548258834092, + "grad_norm": 35.15635299682617, + "learning_rate": 5e-06, + "loss": 1.8205, + "num_input_tokens_seen": 54715208, + "step": 433 + }, + { + "epoch": 0.11107548258834092, + "loss": 1.9890425205230713, + "loss_ce": 0.004667556844651699, + "loss_iou": 0.87890625, + "loss_num": 0.04541015625, + "loss_xval": 1.984375, + "num_input_tokens_seen": 54715208, + "step": 433 + }, + { + "epoch": 0.11133200795228629, + "grad_norm": 46.86457061767578, + "learning_rate": 5e-06, + "loss": 1.7029, + "num_input_tokens_seen": 54842340, + "step": 434 + }, + { + "epoch": 0.11133200795228629, + "loss": 1.7530335187911987, + "loss_ce": 0.0010804182384163141, + "loss_iou": 0.81640625, + "loss_num": 0.02294921875, + "loss_xval": 1.75, + "num_input_tokens_seen": 54842340, + "step": 434 + }, + { + "epoch": 0.11158853331623164, + "grad_norm": 51.49827575683594, + "learning_rate": 5e-06, + "loss": 1.7707, + "num_input_tokens_seen": 54969424, + "step": 435 + }, + { + "epoch": 0.11158853331623164, + "loss": 1.773402214050293, + "loss_ce": 0.002894323319196701, + "loss_iou": 0.8203125, + "loss_num": 0.026611328125, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 54969424, + "step": 435 + }, + { + "epoch": 0.11184505868017701, + "grad_norm": 66.16261291503906, + "learning_rate": 5e-06, + "loss": 1.8966, + "num_input_tokens_seen": 55094188, + "step": 436 + }, + { + "epoch": 0.11184505868017701, + "loss": 1.8799519538879395, + "loss_ce": 0.005928606726229191, + "loss_iou": 0.87109375, + "loss_num": 0.0260009765625, + "loss_xval": 1.875, + "num_input_tokens_seen": 55094188, + "step": 436 + }, + { + "epoch": 0.11210158404412236, + "grad_norm": 41.551151275634766, + "learning_rate": 5e-06, + "loss": 1.9842, + "num_input_tokens_seen": 55219580, + "step": 437 + }, + { + "epoch": 0.11210158404412236, + "loss": 2.0122172832489014, + "loss_ce": 0.001475208904594183, + "loss_iou": 0.8984375, + "loss_num": 0.04248046875, + "loss_xval": 2.015625, + "num_input_tokens_seen": 55219580, + "step": 437 + }, + { + "epoch": 0.11235810940806772, + "grad_norm": 55.89268493652344, + "learning_rate": 5e-06, + "loss": 1.8722, + "num_input_tokens_seen": 55346164, + "step": 438 + }, + { + "epoch": 0.11235810940806772, + "loss": 1.8634271621704102, + "loss_ce": 0.004052193835377693, + "loss_iou": 0.8359375, + "loss_num": 0.037841796875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 55346164, + "step": 438 + }, + { + "epoch": 0.11261463477201308, + "grad_norm": 37.4359016418457, + "learning_rate": 5e-06, + "loss": 1.7138, + "num_input_tokens_seen": 55473204, + "step": 439 + }, + { + "epoch": 0.11261463477201308, + "loss": 1.5784733295440674, + "loss_ce": 0.002301404718309641, + "loss_iou": 0.734375, + "loss_num": 0.02197265625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 55473204, + "step": 439 + }, + { + "epoch": 0.11287116013595844, + "grad_norm": 41.40851593017578, + "learning_rate": 5e-06, + "loss": 1.7172, + "num_input_tokens_seen": 55598892, + "step": 440 + }, + { + "epoch": 0.11287116013595844, + "loss": 1.5968191623687744, + "loss_ce": 0.0020925644785165787, + "loss_iou": 0.73828125, + "loss_num": 0.024169921875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 55598892, + "step": 440 + }, + { + "epoch": 0.1131276854999038, + "grad_norm": 68.53234100341797, + "learning_rate": 5e-06, + "loss": 1.9479, + "num_input_tokens_seen": 55725876, + "step": 441 + }, + { + "epoch": 0.1131276854999038, + "loss": 2.1297075748443604, + "loss_ce": 0.006660687271505594, + "loss_iou": 0.95703125, + "loss_num": 0.0419921875, + "loss_xval": 2.125, + "num_input_tokens_seen": 55725876, + "step": 441 + }, + { + "epoch": 0.11338421086384916, + "grad_norm": 49.62549591064453, + "learning_rate": 5e-06, + "loss": 1.8303, + "num_input_tokens_seen": 55851640, + "step": 442 + }, + { + "epoch": 0.11338421086384916, + "loss": 1.8788096904754639, + "loss_ce": 0.004786320962011814, + "loss_iou": 0.8515625, + "loss_num": 0.034912109375, + "loss_xval": 1.875, + "num_input_tokens_seen": 55851640, + "step": 442 + }, + { + "epoch": 0.11364073622779453, + "grad_norm": 26.42085838317871, + "learning_rate": 5e-06, + "loss": 1.7496, + "num_input_tokens_seen": 55978172, + "step": 443 + }, + { + "epoch": 0.11364073622779453, + "loss": 1.6828863620758057, + "loss_ce": 0.0031988569535315037, + "loss_iou": 0.765625, + "loss_num": 0.029296875, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 55978172, + "step": 443 + }, + { + "epoch": 0.11389726159173988, + "grad_norm": 25.665645599365234, + "learning_rate": 5e-06, + "loss": 1.591, + "num_input_tokens_seen": 56104104, + "step": 444 + }, + { + "epoch": 0.11389726159173988, + "loss": 1.474023461341858, + "loss_ce": 0.0013672173954546452, + "loss_iou": 0.6953125, + "loss_num": 0.017333984375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 56104104, + "step": 444 + }, + { + "epoch": 0.11415378695568525, + "grad_norm": 50.49810791015625, + "learning_rate": 5e-06, + "loss": 1.7112, + "num_input_tokens_seen": 56230512, + "step": 445 + }, + { + "epoch": 0.11415378695568525, + "loss": 1.6772782802581787, + "loss_ce": 0.003450163174420595, + "loss_iou": 0.76953125, + "loss_num": 0.0277099609375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 56230512, + "step": 445 + }, + { + "epoch": 0.1144103123196306, + "grad_norm": 48.773704528808594, + "learning_rate": 5e-06, + "loss": 1.8513, + "num_input_tokens_seen": 56357040, + "step": 446 + }, + { + "epoch": 0.1144103123196306, + "loss": 1.8029847145080566, + "loss_ce": 0.0022033960558474064, + "loss_iou": 0.8203125, + "loss_num": 0.03271484375, + "loss_xval": 1.796875, + "num_input_tokens_seen": 56357040, + "step": 446 + }, + { + "epoch": 0.11466683768357597, + "grad_norm": 49.184810638427734, + "learning_rate": 5e-06, + "loss": 1.7231, + "num_input_tokens_seen": 56483184, + "step": 447 + }, + { + "epoch": 0.11466683768357597, + "loss": 1.6470956802368164, + "loss_ce": 0.00354099553078413, + "loss_iou": 0.765625, + "loss_num": 0.02197265625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 56483184, + "step": 447 + }, + { + "epoch": 0.11492336304752132, + "grad_norm": 43.94487380981445, + "learning_rate": 5e-06, + "loss": 1.8633, + "num_input_tokens_seen": 56609512, + "step": 448 + }, + { + "epoch": 0.11492336304752132, + "loss": 1.7870070934295654, + "loss_ce": 0.001850806176662445, + "loss_iou": 0.8203125, + "loss_num": 0.02978515625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 56609512, + "step": 448 + }, + { + "epoch": 0.11517988841146669, + "grad_norm": 38.418453216552734, + "learning_rate": 5e-06, + "loss": 1.8474, + "num_input_tokens_seen": 56735516, + "step": 449 + }, + { + "epoch": 0.11517988841146669, + "loss": 1.7076292037963867, + "loss_ce": 0.0025511044077575207, + "loss_iou": 0.77734375, + "loss_num": 0.0301513671875, + "loss_xval": 1.703125, + "num_input_tokens_seen": 56735516, + "step": 449 + }, + { + "epoch": 0.11543641377541204, + "grad_norm": 36.228641510009766, + "learning_rate": 5e-06, + "loss": 1.6306, + "num_input_tokens_seen": 56861820, + "step": 450 + }, + { + "epoch": 0.11543641377541204, + "loss": 1.7181226015090942, + "loss_ce": 0.0032789018005132675, + "loss_iou": 0.796875, + "loss_num": 0.0242919921875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 56861820, + "step": 450 + }, + { + "epoch": 0.11569293913935741, + "grad_norm": 72.03665924072266, + "learning_rate": 5e-06, + "loss": 1.8012, + "num_input_tokens_seen": 56988372, + "step": 451 + }, + { + "epoch": 0.11569293913935741, + "loss": 1.6160550117492676, + "loss_ce": 0.0008206400088965893, + "loss_iou": 0.76171875, + "loss_num": 0.017822265625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 56988372, + "step": 451 + }, + { + "epoch": 0.11594946450330276, + "grad_norm": 36.3841552734375, + "learning_rate": 5e-06, + "loss": 1.9981, + "num_input_tokens_seen": 57114552, + "step": 452 + }, + { + "epoch": 0.11594946450330276, + "loss": 2.025315761566162, + "loss_ce": 0.001878279261291027, + "loss_iou": 0.921875, + "loss_num": 0.03662109375, + "loss_xval": 2.03125, + "num_input_tokens_seen": 57114552, + "step": 452 + }, + { + "epoch": 0.11620598986724813, + "grad_norm": 27.53243064880371, + "learning_rate": 5e-06, + "loss": 1.7284, + "num_input_tokens_seen": 57239516, + "step": 453 + }, + { + "epoch": 0.11620598986724813, + "loss": 1.6692149639129639, + "loss_ce": 0.0012461625738069415, + "loss_iou": 0.7578125, + "loss_num": 0.0311279296875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 57239516, + "step": 453 + }, + { + "epoch": 0.11646251523119348, + "grad_norm": 30.878704071044922, + "learning_rate": 5e-06, + "loss": 1.725, + "num_input_tokens_seen": 57365124, + "step": 454 + }, + { + "epoch": 0.11646251523119348, + "loss": 1.6984126567840576, + "loss_ce": 0.004076790995895863, + "loss_iou": 0.76171875, + "loss_num": 0.0341796875, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 57365124, + "step": 454 + }, + { + "epoch": 0.11671904059513885, + "grad_norm": 95.86900329589844, + "learning_rate": 5e-06, + "loss": 1.6021, + "num_input_tokens_seen": 57492680, + "step": 455 + }, + { + "epoch": 0.11671904059513885, + "loss": 1.5831823348999023, + "loss_ce": 0.0011511017801240087, + "loss_iou": 0.73828125, + "loss_num": 0.020751953125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 57492680, + "step": 455 + }, + { + "epoch": 0.1169755659590842, + "grad_norm": 37.60855484008789, + "learning_rate": 5e-06, + "loss": 2.0232, + "num_input_tokens_seen": 57618624, + "step": 456 + }, + { + "epoch": 0.1169755659590842, + "loss": 2.0790798664093018, + "loss_ce": 0.005837695673108101, + "loss_iou": 0.92578125, + "loss_num": 0.044677734375, + "loss_xval": 2.078125, + "num_input_tokens_seen": 57618624, + "step": 456 + }, + { + "epoch": 0.11723209132302956, + "grad_norm": 33.16410446166992, + "learning_rate": 5e-06, + "loss": 1.885, + "num_input_tokens_seen": 57744172, + "step": 457 + }, + { + "epoch": 0.11723209132302956, + "loss": 1.9857099056243896, + "loss_ce": 0.00914732925593853, + "loss_iou": 0.875, + "loss_num": 0.044921875, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 57744172, + "step": 457 + }, + { + "epoch": 0.11748861668697493, + "grad_norm": 29.640005111694336, + "learning_rate": 5e-06, + "loss": 1.7067, + "num_input_tokens_seen": 57870072, + "step": 458 + }, + { + "epoch": 0.11748861668697493, + "loss": 1.7805681228637695, + "loss_ce": 0.00615404499694705, + "loss_iou": 0.8203125, + "loss_num": 0.0263671875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 57870072, + "step": 458 + }, + { + "epoch": 0.11774514205092028, + "grad_norm": 39.27460479736328, + "learning_rate": 5e-06, + "loss": 1.7509, + "num_input_tokens_seen": 57995948, + "step": 459 + }, + { + "epoch": 0.11774514205092028, + "loss": 1.7685242891311646, + "loss_ce": 0.0038758430164307356, + "loss_iou": 0.796875, + "loss_num": 0.034423828125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 57995948, + "step": 459 + }, + { + "epoch": 0.11800166741486565, + "grad_norm": 63.454078674316406, + "learning_rate": 5e-06, + "loss": 1.7184, + "num_input_tokens_seen": 58122476, + "step": 460 + }, + { + "epoch": 0.11800166741486565, + "loss": 1.7562344074249268, + "loss_ce": 0.0052577354945242405, + "loss_iou": 0.79296875, + "loss_num": 0.033203125, + "loss_xval": 1.75, + "num_input_tokens_seen": 58122476, + "step": 460 + }, + { + "epoch": 0.118258192778811, + "grad_norm": 36.77939224243164, + "learning_rate": 5e-06, + "loss": 1.9286, + "num_input_tokens_seen": 58248364, + "step": 461 + }, + { + "epoch": 0.118258192778811, + "loss": 1.8818247318267822, + "loss_ce": 0.0019419132731854916, + "loss_iou": 0.84765625, + "loss_num": 0.037841796875, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 58248364, + "step": 461 + }, + { + "epoch": 0.11851471814275637, + "grad_norm": 48.50825881958008, + "learning_rate": 5e-06, + "loss": 1.7753, + "num_input_tokens_seen": 58374284, + "step": 462 + }, + { + "epoch": 0.11851471814275637, + "loss": 1.9311033487319946, + "loss_ce": 0.0014157816767692566, + "loss_iou": 0.86328125, + "loss_num": 0.039794921875, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 58374284, + "step": 462 + }, + { + "epoch": 0.11877124350670172, + "grad_norm": 69.14360809326172, + "learning_rate": 5e-06, + "loss": 1.655, + "num_input_tokens_seen": 58500824, + "step": 463 + }, + { + "epoch": 0.11877124350670172, + "loss": 1.7856534719467163, + "loss_ce": 0.003426934825256467, + "loss_iou": 0.81640625, + "loss_num": 0.0294189453125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 58500824, + "step": 463 + }, + { + "epoch": 0.11902776887064709, + "grad_norm": 66.36541748046875, + "learning_rate": 5e-06, + "loss": 1.721, + "num_input_tokens_seen": 58626708, + "step": 464 + }, + { + "epoch": 0.11902776887064709, + "loss": 1.638522982597351, + "loss_ce": 0.001804242143407464, + "loss_iou": 0.765625, + "loss_num": 0.020751953125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 58626708, + "step": 464 + }, + { + "epoch": 0.11928429423459244, + "grad_norm": 51.17401123046875, + "learning_rate": 5e-06, + "loss": 1.6, + "num_input_tokens_seen": 58752764, + "step": 465 + }, + { + "epoch": 0.11928429423459244, + "loss": 1.3945918083190918, + "loss_ce": 0.0005488909082487226, + "loss_iou": 0.65234375, + "loss_num": 0.018310546875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 58752764, + "step": 465 + }, + { + "epoch": 0.11954081959853781, + "grad_norm": 50.99451446533203, + "learning_rate": 5e-06, + "loss": 1.7114, + "num_input_tokens_seen": 58878700, + "step": 466 + }, + { + "epoch": 0.11954081959853781, + "loss": 1.6561709642410278, + "loss_ce": 0.0008974944357760251, + "loss_iou": 0.7578125, + "loss_num": 0.027587890625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 58878700, + "step": 466 + }, + { + "epoch": 0.11979734496248316, + "grad_norm": 48.29070281982422, + "learning_rate": 5e-06, + "loss": 1.8735, + "num_input_tokens_seen": 59004676, + "step": 467 + }, + { + "epoch": 0.11979734496248316, + "loss": 1.8461523056030273, + "loss_ce": 0.0033788110595196486, + "loss_iou": 0.828125, + "loss_num": 0.037841796875, + "loss_xval": 1.84375, + "num_input_tokens_seen": 59004676, + "step": 467 + }, + { + "epoch": 0.12005387032642853, + "grad_norm": 42.622894287109375, + "learning_rate": 5e-06, + "loss": 1.6268, + "num_input_tokens_seen": 59131396, + "step": 468 + }, + { + "epoch": 0.12005387032642853, + "loss": 1.6658351421356201, + "loss_ce": 0.0017726825317367911, + "loss_iou": 0.75, + "loss_num": 0.032958984375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 59131396, + "step": 468 + }, + { + "epoch": 0.12031039569037388, + "grad_norm": 104.28092193603516, + "learning_rate": 5e-06, + "loss": 1.7876, + "num_input_tokens_seen": 59258152, + "step": 469 + }, + { + "epoch": 0.12031039569037388, + "loss": 1.8054823875427246, + "loss_ce": 0.0017713536508381367, + "loss_iou": 0.84375, + "loss_num": 0.023193359375, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 59258152, + "step": 469 + }, + { + "epoch": 0.12056692105431925, + "grad_norm": 38.41648864746094, + "learning_rate": 5e-06, + "loss": 2.07, + "num_input_tokens_seen": 59383784, + "step": 470 + }, + { + "epoch": 0.12056692105431925, + "loss": 2.154125213623047, + "loss_ce": 0.003734491765499115, + "loss_iou": 0.9453125, + "loss_num": 0.052001953125, + "loss_xval": 2.15625, + "num_input_tokens_seen": 59383784, + "step": 470 + }, + { + "epoch": 0.1208234464182646, + "grad_norm": 40.71257400512695, + "learning_rate": 5e-06, + "loss": 1.784, + "num_input_tokens_seen": 59510216, + "step": 471 + }, + { + "epoch": 0.1208234464182646, + "loss": 1.7215721607208252, + "loss_ce": 0.0008690999820828438, + "loss_iou": 0.7890625, + "loss_num": 0.029052734375, + "loss_xval": 1.71875, + "num_input_tokens_seen": 59510216, + "step": 471 + }, + { + "epoch": 0.12107997178220997, + "grad_norm": 43.049556732177734, + "learning_rate": 5e-06, + "loss": 1.7414, + "num_input_tokens_seen": 59636372, + "step": 472 + }, + { + "epoch": 0.12107997178220997, + "loss": 1.6449707746505737, + "loss_ce": 0.01118170004338026, + "loss_iou": 0.75390625, + "loss_num": 0.02587890625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 59636372, + "step": 472 + }, + { + "epoch": 0.12133649714615533, + "grad_norm": 41.08643341064453, + "learning_rate": 5e-06, + "loss": 1.7773, + "num_input_tokens_seen": 59761944, + "step": 473 + }, + { + "epoch": 0.12133649714615533, + "loss": 1.6837382316589355, + "loss_ce": 0.004050835967063904, + "loss_iou": 0.77734375, + "loss_num": 0.025146484375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 59761944, + "step": 473 + }, + { + "epoch": 0.12159302251010069, + "grad_norm": 46.653629302978516, + "learning_rate": 5e-06, + "loss": 1.5975, + "num_input_tokens_seen": 59887824, + "step": 474 + }, + { + "epoch": 0.12159302251010069, + "loss": 1.5279282331466675, + "loss_ce": 0.0005844776169396937, + "loss_iou": 0.70703125, + "loss_num": 0.022216796875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 59887824, + "step": 474 + }, + { + "epoch": 0.12184954787404605, + "grad_norm": 50.21329879760742, + "learning_rate": 5e-06, + "loss": 1.6078, + "num_input_tokens_seen": 60013776, + "step": 475 + }, + { + "epoch": 0.12184954787404605, + "loss": 1.5767042636871338, + "loss_ce": 0.0024854273069649935, + "loss_iou": 0.734375, + "loss_num": 0.020751953125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 60013776, + "step": 475 + }, + { + "epoch": 0.1221060732379914, + "grad_norm": 60.5197639465332, + "learning_rate": 5e-06, + "loss": 1.6594, + "num_input_tokens_seen": 60140940, + "step": 476 + }, + { + "epoch": 0.1221060732379914, + "loss": 1.5039000511169434, + "loss_ce": 0.0009703865507617593, + "loss_iou": 0.703125, + "loss_num": 0.0189208984375, + "loss_xval": 1.5, + "num_input_tokens_seen": 60140940, + "step": 476 + }, + { + "epoch": 0.12236259860193677, + "grad_norm": 78.34883880615234, + "learning_rate": 5e-06, + "loss": 1.749, + "num_input_tokens_seen": 60267612, + "step": 477 + }, + { + "epoch": 0.12236259860193677, + "loss": 1.7461729049682617, + "loss_ce": 0.0010556046618148685, + "loss_iou": 0.7890625, + "loss_num": 0.032958984375, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 60267612, + "step": 477 + }, + { + "epoch": 0.12261912396588212, + "grad_norm": 41.61360168457031, + "learning_rate": 5e-06, + "loss": 1.8079, + "num_input_tokens_seen": 60394156, + "step": 478 + }, + { + "epoch": 0.12261912396588212, + "loss": 1.9458622932434082, + "loss_ce": 0.0025028635282069445, + "loss_iou": 0.875, + "loss_num": 0.039306640625, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 60394156, + "step": 478 + }, + { + "epoch": 0.12287564932982749, + "grad_norm": 28.599647521972656, + "learning_rate": 5e-06, + "loss": 1.6507, + "num_input_tokens_seen": 60520712, + "step": 479 + }, + { + "epoch": 0.12287564932982749, + "loss": 1.728694200515747, + "loss_ce": 0.0021316264756023884, + "loss_iou": 0.7890625, + "loss_num": 0.029052734375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 60520712, + "step": 479 + }, + { + "epoch": 0.12313217469377284, + "grad_norm": 29.31183624267578, + "learning_rate": 5e-06, + "loss": 1.6464, + "num_input_tokens_seen": 60646736, + "step": 480 + }, + { + "epoch": 0.12313217469377284, + "loss": 1.7321536540985107, + "loss_ce": 0.0026613736990839243, + "loss_iou": 0.7890625, + "loss_num": 0.0306396484375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 60646736, + "step": 480 + }, + { + "epoch": 0.12338870005771821, + "grad_norm": 30.173465728759766, + "learning_rate": 5e-06, + "loss": 1.6531, + "num_input_tokens_seen": 60772592, + "step": 481 + }, + { + "epoch": 0.12338870005771821, + "loss": 1.6801327466964722, + "loss_ce": 0.003374944906681776, + "loss_iou": 0.76171875, + "loss_num": 0.031005859375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 60772592, + "step": 481 + }, + { + "epoch": 0.12364522542166356, + "grad_norm": 43.21902084350586, + "learning_rate": 5e-06, + "loss": 1.6918, + "num_input_tokens_seen": 60898136, + "step": 482 + }, + { + "epoch": 0.12364522542166356, + "loss": 1.501891016960144, + "loss_ce": 0.000914397242013365, + "loss_iou": 0.6953125, + "loss_num": 0.0216064453125, + "loss_xval": 1.5, + "num_input_tokens_seen": 60898136, + "step": 482 + }, + { + "epoch": 0.12390175078560893, + "grad_norm": 86.57583618164062, + "learning_rate": 5e-06, + "loss": 1.7655, + "num_input_tokens_seen": 61024512, + "step": 483 + }, + { + "epoch": 0.12390175078560893, + "loss": 1.7919373512268066, + "loss_ce": 0.003851282177492976, + "loss_iou": 0.8203125, + "loss_num": 0.029541015625, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 61024512, + "step": 483 + }, + { + "epoch": 0.12415827614955428, + "grad_norm": 44.79195785522461, + "learning_rate": 5e-06, + "loss": 1.7891, + "num_input_tokens_seen": 61150340, + "step": 484 + }, + { + "epoch": 0.12415827614955428, + "loss": 1.7644908428192139, + "loss_ce": 0.002772129839286208, + "loss_iou": 0.80859375, + "loss_num": 0.0281982421875, + "loss_xval": 1.765625, + "num_input_tokens_seen": 61150340, + "step": 484 + }, + { + "epoch": 0.12441480151349965, + "grad_norm": 44.41315460205078, + "learning_rate": 5e-06, + "loss": 1.8381, + "num_input_tokens_seen": 61276012, + "step": 485 + }, + { + "epoch": 0.12441480151349965, + "loss": 1.9642342329025269, + "loss_ce": 0.001343660755082965, + "loss_iou": 0.890625, + "loss_num": 0.037109375, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 61276012, + "step": 485 + }, + { + "epoch": 0.124671326877445, + "grad_norm": 47.64482116699219, + "learning_rate": 5e-06, + "loss": 1.6081, + "num_input_tokens_seen": 61402636, + "step": 486 + }, + { + "epoch": 0.124671326877445, + "loss": 1.72786283493042, + "loss_ce": 0.0013002816122025251, + "loss_iou": 0.78515625, + "loss_num": 0.031494140625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 61402636, + "step": 486 + }, + { + "epoch": 0.12492785224139037, + "grad_norm": 71.87833404541016, + "learning_rate": 5e-06, + "loss": 1.7674, + "num_input_tokens_seen": 61527740, + "step": 487 + }, + { + "epoch": 0.12492785224139037, + "loss": 1.7446229457855225, + "loss_ce": 0.003411897225305438, + "loss_iou": 0.79296875, + "loss_num": 0.030517578125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 61527740, + "step": 487 + }, + { + "epoch": 0.12518437760533574, + "grad_norm": 56.86620330810547, + "learning_rate": 5e-06, + "loss": 1.7852, + "num_input_tokens_seen": 61653716, + "step": 488 + }, + { + "epoch": 0.12518437760533574, + "loss": 1.754368543624878, + "loss_ce": 0.00046220317017287016, + "loss_iou": 0.80859375, + "loss_num": 0.0279541015625, + "loss_xval": 1.75, + "num_input_tokens_seen": 61653716, + "step": 488 + }, + { + "epoch": 0.1254409029692811, + "grad_norm": 30.357770919799805, + "learning_rate": 5e-06, + "loss": 1.6365, + "num_input_tokens_seen": 61780636, + "step": 489 + }, + { + "epoch": 0.1254409029692811, + "loss": 1.6131393909454346, + "loss_ce": 0.003764481283724308, + "loss_iou": 0.734375, + "loss_num": 0.028076171875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 61780636, + "step": 489 + }, + { + "epoch": 0.12569742833322645, + "grad_norm": 32.89668655395508, + "learning_rate": 5e-06, + "loss": 1.4768, + "num_input_tokens_seen": 61906520, + "step": 490 + }, + { + "epoch": 0.12569742833322645, + "loss": 1.5551114082336426, + "loss_ce": 0.002377046039327979, + "loss_iou": 0.71875, + "loss_num": 0.0235595703125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 61906520, + "step": 490 + }, + { + "epoch": 0.1259539536971718, + "grad_norm": 68.21038055419922, + "learning_rate": 5e-06, + "loss": 1.5698, + "num_input_tokens_seen": 62032080, + "step": 491 + }, + { + "epoch": 0.1259539536971718, + "loss": 1.4567121267318726, + "loss_ce": 0.001633996143937111, + "loss_iou": 0.67578125, + "loss_num": 0.0203857421875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 62032080, + "step": 491 + }, + { + "epoch": 0.12621047906111718, + "grad_norm": 54.81508255004883, + "learning_rate": 5e-06, + "loss": 1.6964, + "num_input_tokens_seen": 62158264, + "step": 492 + }, + { + "epoch": 0.12621047906111718, + "loss": 1.7056665420532227, + "loss_ce": 0.0035181199200451374, + "loss_iou": 0.7890625, + "loss_num": 0.024658203125, + "loss_xval": 1.703125, + "num_input_tokens_seen": 62158264, + "step": 492 + }, + { + "epoch": 0.12646700442506253, + "grad_norm": 73.14745330810547, + "learning_rate": 5e-06, + "loss": 1.6376, + "num_input_tokens_seen": 62284376, + "step": 493 + }, + { + "epoch": 0.12646700442506253, + "loss": 1.351789116859436, + "loss_ce": 0.0007149467710405588, + "loss_iou": 0.640625, + "loss_num": 0.01434326171875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 62284376, + "step": 493 + }, + { + "epoch": 0.1267235297890079, + "grad_norm": 92.09954833984375, + "learning_rate": 5e-06, + "loss": 1.6787, + "num_input_tokens_seen": 62411464, + "step": 494 + }, + { + "epoch": 0.1267235297890079, + "loss": 1.664896845817566, + "loss_ce": 0.0008343269000761211, + "loss_iou": 0.78125, + "loss_num": 0.02099609375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 62411464, + "step": 494 + }, + { + "epoch": 0.12698005515295324, + "grad_norm": 45.41442108154297, + "learning_rate": 5e-06, + "loss": 1.8646, + "num_input_tokens_seen": 62537136, + "step": 495 + }, + { + "epoch": 0.12698005515295324, + "loss": 1.741948127746582, + "loss_ce": 0.0007371420506387949, + "loss_iou": 0.80078125, + "loss_num": 0.02783203125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 62537136, + "step": 495 + }, + { + "epoch": 0.1272365805168986, + "grad_norm": 39.136627197265625, + "learning_rate": 5e-06, + "loss": 1.7646, + "num_input_tokens_seen": 62662620, + "step": 496 + }, + { + "epoch": 0.1272365805168986, + "loss": 1.7433350086212158, + "loss_ce": 0.005053797736763954, + "loss_iou": 0.78515625, + "loss_num": 0.033203125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 62662620, + "step": 496 + }, + { + "epoch": 0.12749310588084398, + "grad_norm": 49.318389892578125, + "learning_rate": 5e-06, + "loss": 1.6535, + "num_input_tokens_seen": 62789096, + "step": 497 + }, + { + "epoch": 0.12749310588084398, + "loss": 1.6110996007919312, + "loss_ce": 0.0007480541826225817, + "loss_iou": 0.75390625, + "loss_num": 0.0201416015625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 62789096, + "step": 497 + }, + { + "epoch": 0.12774963124478933, + "grad_norm": 47.046878814697266, + "learning_rate": 5e-06, + "loss": 1.7151, + "num_input_tokens_seen": 62915312, + "step": 498 + }, + { + "epoch": 0.12774963124478933, + "loss": 1.8070034980773926, + "loss_ce": 0.00231599691323936, + "loss_iou": 0.81640625, + "loss_num": 0.03466796875, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 62915312, + "step": 498 + }, + { + "epoch": 0.12800615660873468, + "grad_norm": 69.81344604492188, + "learning_rate": 5e-06, + "loss": 1.574, + "num_input_tokens_seen": 63041640, + "step": 499 + }, + { + "epoch": 0.12800615660873468, + "loss": 1.6115577220916748, + "loss_ce": 0.0031593618914484978, + "loss_iou": 0.7421875, + "loss_num": 0.0252685546875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 63041640, + "step": 499 + }, + { + "epoch": 0.12826268197268004, + "grad_norm": 35.509857177734375, + "learning_rate": 5e-06, + "loss": 1.7074, + "num_input_tokens_seen": 63168408, + "step": 500 + }, + { + "epoch": 0.12826268197268004, + "eval_icons_CIoU": -0.08813555911183357, + "eval_icons_GIoU": -0.12102716788649559, + "eval_icons_IoU": 0.1143653392791748, + "eval_icons_MAE_all": 0.059639595448970795, + "eval_icons_MAE_h": 0.0811239741742611, + "eval_icons_MAE_w": 0.07068989798426628, + "eval_icons_MAE_x_boxes": 0.06718512624502182, + "eval_icons_MAE_y_boxes": 0.07723477482795715, + "eval_icons_NUM_probability": 0.9998332858085632, + "eval_icons_inside_bbox": 0.2204861119389534, + "eval_icons_loss": 2.531628370285034, + "eval_icons_loss_ce": 0.0009354916110169142, + "eval_icons_loss_iou": 1.110595703125, + "eval_icons_loss_num": 0.05770111083984375, + "eval_icons_loss_xval": 2.50927734375, + "eval_icons_runtime": 39.9475, + "eval_icons_samples_per_second": 1.252, + "eval_icons_steps_per_second": 0.05, + "num_input_tokens_seen": 63168408, + "step": 500 + }, + { + "epoch": 0.12826268197268004, + "eval_screenspot_CIoU": -0.016426607966423035, + "eval_screenspot_GIoU": -0.04860709219550093, + "eval_screenspot_IoU": 0.17466307679812113, + "eval_screenspot_MAE_all": 0.08095420400301616, + "eval_screenspot_MAE_h": 0.06321993718544643, + "eval_screenspot_MAE_w": 0.14798389126857123, + "eval_screenspot_MAE_x_boxes": 0.11274510622024536, + "eval_screenspot_MAE_y_boxes": 0.04963180422782898, + "eval_screenspot_NUM_probability": 0.999764641125997, + "eval_screenspot_inside_bbox": 0.5541666746139526, + "eval_screenspot_loss": 2.5440590381622314, + "eval_screenspot_loss_ce": 0.005904497268299262, + "eval_screenspot_loss_iou": 1.0734049479166667, + "eval_screenspot_loss_num": 0.08739217122395833, + "eval_screenspot_loss_xval": 2.5843098958333335, + "eval_screenspot_runtime": 68.1776, + "eval_screenspot_samples_per_second": 1.305, + "eval_screenspot_steps_per_second": 0.044, + "num_input_tokens_seen": 63168408, + "step": 500 + }, + { + "epoch": 0.12826268197268004, + "loss": 2.5779953002929688, + "loss_ce": 0.003776472993195057, + "loss_iou": 1.0703125, + "loss_num": 0.0849609375, + "loss_xval": 2.578125, + "num_input_tokens_seen": 63168408, + "step": 500 + }, + { + "epoch": 0.12851920733662542, + "grad_norm": 65.97784423828125, + "learning_rate": 5e-06, + "loss": 1.625, + "num_input_tokens_seen": 63294960, + "step": 501 + }, + { + "epoch": 0.12851920733662542, + "loss": 1.494499683380127, + "loss_ce": 0.0013356480048969388, + "loss_iou": 0.69140625, + "loss_num": 0.021484375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 63294960, + "step": 501 + }, + { + "epoch": 0.12877573270057077, + "grad_norm": 41.564544677734375, + "learning_rate": 5e-06, + "loss": 1.7892, + "num_input_tokens_seen": 63421296, + "step": 502 + }, + { + "epoch": 0.12877573270057077, + "loss": 1.7985565662384033, + "loss_ce": 0.0007051015854813159, + "loss_iou": 0.828125, + "loss_num": 0.028076171875, + "loss_xval": 1.796875, + "num_input_tokens_seen": 63421296, + "step": 502 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 80.64423370361328, + "learning_rate": 5e-06, + "loss": 1.7202, + "num_input_tokens_seen": 63548300, + "step": 503 + }, + { + "epoch": 0.12903225806451613, + "loss": 1.8039515018463135, + "loss_ce": 0.003170343115925789, + "loss_iou": 0.8046875, + "loss_num": 0.03857421875, + "loss_xval": 1.796875, + "num_input_tokens_seen": 63548300, + "step": 503 + }, + { + "epoch": 0.12928878342846148, + "grad_norm": 31.872426986694336, + "learning_rate": 5e-06, + "loss": 1.6057, + "num_input_tokens_seen": 63673988, + "step": 504 + }, + { + "epoch": 0.12928878342846148, + "loss": 1.4717856645584106, + "loss_ce": 0.002059069462120533, + "loss_iou": 0.671875, + "loss_num": 0.0244140625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 63673988, + "step": 504 + }, + { + "epoch": 0.12954530879240686, + "grad_norm": 47.289302825927734, + "learning_rate": 5e-06, + "loss": 1.6607, + "num_input_tokens_seen": 63800988, + "step": 505 + }, + { + "epoch": 0.12954530879240686, + "loss": 1.6136938333511353, + "loss_ce": 0.003342278767377138, + "loss_iou": 0.74609375, + "loss_num": 0.0240478515625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 63800988, + "step": 505 + }, + { + "epoch": 0.1298018341563522, + "grad_norm": 38.54035949707031, + "learning_rate": 5e-06, + "loss": 1.5921, + "num_input_tokens_seen": 63927904, + "step": 506 + }, + { + "epoch": 0.1298018341563522, + "loss": 1.4334439039230347, + "loss_ce": 0.0008266839431598783, + "loss_iou": 0.6640625, + "loss_num": 0.0205078125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 63927904, + "step": 506 + }, + { + "epoch": 0.13005835952029757, + "grad_norm": 37.19766616821289, + "learning_rate": 5e-06, + "loss": 1.6291, + "num_input_tokens_seen": 64053800, + "step": 507 + }, + { + "epoch": 0.13005835952029757, + "loss": 1.7088382244110107, + "loss_ce": 0.0018070570658892393, + "loss_iou": 0.78125, + "loss_num": 0.029296875, + "loss_xval": 1.703125, + "num_input_tokens_seen": 64053800, + "step": 507 + }, + { + "epoch": 0.13031488488424292, + "grad_norm": 61.4913215637207, + "learning_rate": 5e-06, + "loss": 1.5872, + "num_input_tokens_seen": 64179416, + "step": 508 + }, + { + "epoch": 0.13031488488424292, + "loss": 1.581200122833252, + "loss_ce": 0.0011220432352274656, + "loss_iou": 0.7265625, + "loss_num": 0.024169921875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 64179416, + "step": 508 + }, + { + "epoch": 0.1305714102481883, + "grad_norm": 42.580650329589844, + "learning_rate": 5e-06, + "loss": 1.6808, + "num_input_tokens_seen": 64304800, + "step": 509 + }, + { + "epoch": 0.1305714102481883, + "loss": 1.6761349439620972, + "loss_ce": 0.00035372647107578814, + "loss_iou": 0.78125, + "loss_num": 0.0233154296875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 64304800, + "step": 509 + }, + { + "epoch": 0.13082793561213366, + "grad_norm": 44.40543746948242, + "learning_rate": 5e-06, + "loss": 1.61, + "num_input_tokens_seen": 64431492, + "step": 510 + }, + { + "epoch": 0.13082793561213366, + "loss": 1.4809927940368652, + "loss_ce": 0.0005239903694018722, + "loss_iou": 0.6953125, + "loss_num": 0.018310546875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 64431492, + "step": 510 + }, + { + "epoch": 0.131084460976079, + "grad_norm": 61.31940841674805, + "learning_rate": 5e-06, + "loss": 1.7046, + "num_input_tokens_seen": 64557616, + "step": 511 + }, + { + "epoch": 0.131084460976079, + "loss": 1.811378002166748, + "loss_ce": 0.003760798368602991, + "loss_iou": 0.83203125, + "loss_num": 0.0291748046875, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 64557616, + "step": 511 + }, + { + "epoch": 0.13134098634002436, + "grad_norm": 68.12958526611328, + "learning_rate": 5e-06, + "loss": 1.6861, + "num_input_tokens_seen": 64683780, + "step": 512 + }, + { + "epoch": 0.13134098634002436, + "loss": 1.6338921785354614, + "loss_ce": 0.006939140148460865, + "loss_iou": 0.765625, + "loss_num": 0.01953125, + "loss_xval": 1.625, + "num_input_tokens_seen": 64683780, + "step": 512 + }, + { + "epoch": 0.13159751170396972, + "grad_norm": 40.787376403808594, + "learning_rate": 5e-06, + "loss": 1.8374, + "num_input_tokens_seen": 64811172, + "step": 513 + }, + { + "epoch": 0.13159751170396972, + "loss": 1.9357507228851318, + "loss_ce": 0.0021569635719060898, + "loss_iou": 0.875, + "loss_num": 0.035888671875, + "loss_xval": 1.9375, + "num_input_tokens_seen": 64811172, + "step": 513 + }, + { + "epoch": 0.1318540370679151, + "grad_norm": 87.03521728515625, + "learning_rate": 5e-06, + "loss": 1.6736, + "num_input_tokens_seen": 64938304, + "step": 514 + }, + { + "epoch": 0.1318540370679151, + "loss": 1.6472301483154297, + "loss_ce": 0.001722260843962431, + "loss_iou": 0.74609375, + "loss_num": 0.0299072265625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 64938304, + "step": 514 + }, + { + "epoch": 0.13211056243186045, + "grad_norm": 35.112728118896484, + "learning_rate": 5e-06, + "loss": 1.6916, + "num_input_tokens_seen": 65064720, + "step": 515 + }, + { + "epoch": 0.13211056243186045, + "loss": 1.6961196660995483, + "loss_ce": 0.0008071529446169734, + "loss_iou": 0.78125, + "loss_num": 0.02685546875, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 65064720, + "step": 515 + }, + { + "epoch": 0.1323670877958058, + "grad_norm": 47.61723709106445, + "learning_rate": 5e-06, + "loss": 1.6344, + "num_input_tokens_seen": 65190608, + "step": 516 + }, + { + "epoch": 0.1323670877958058, + "loss": 1.5302079916000366, + "loss_ce": 0.0038408292457461357, + "loss_iou": 0.69921875, + "loss_num": 0.0260009765625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 65190608, + "step": 516 + }, + { + "epoch": 0.13262361315975116, + "grad_norm": 44.02301788330078, + "learning_rate": 5e-06, + "loss": 1.6758, + "num_input_tokens_seen": 65317176, + "step": 517 + }, + { + "epoch": 0.13262361315975116, + "loss": 1.7421395778656006, + "loss_ce": 0.002881779335439205, + "loss_iou": 0.796875, + "loss_num": 0.02978515625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 65317176, + "step": 517 + }, + { + "epoch": 0.13288013852369654, + "grad_norm": 88.42464447021484, + "learning_rate": 5e-06, + "loss": 1.6965, + "num_input_tokens_seen": 65443400, + "step": 518 + }, + { + "epoch": 0.13288013852369654, + "loss": 1.682906985282898, + "loss_ce": 0.0022428741212934256, + "loss_iou": 0.78125, + "loss_num": 0.0245361328125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 65443400, + "step": 518 + }, + { + "epoch": 0.1331366638876419, + "grad_norm": 46.00665283203125, + "learning_rate": 5e-06, + "loss": 1.8612, + "num_input_tokens_seen": 65569896, + "step": 519 + }, + { + "epoch": 0.1331366638876419, + "loss": 1.9804153442382812, + "loss_ce": 0.005805996246635914, + "loss_iou": 0.890625, + "loss_num": 0.039794921875, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 65569896, + "step": 519 + }, + { + "epoch": 0.13339318925158725, + "grad_norm": 37.248634338378906, + "learning_rate": 5e-06, + "loss": 1.6576, + "num_input_tokens_seen": 65696472, + "step": 520 + }, + { + "epoch": 0.13339318925158725, + "loss": 1.6467912197113037, + "loss_ce": 0.0022599489893764257, + "loss_iou": 0.75390625, + "loss_num": 0.02783203125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 65696472, + "step": 520 + }, + { + "epoch": 0.1336497146155326, + "grad_norm": 40.424705505371094, + "learning_rate": 5e-06, + "loss": 1.6382, + "num_input_tokens_seen": 65822092, + "step": 521 + }, + { + "epoch": 0.1336497146155326, + "loss": 1.8611526489257812, + "loss_ce": 0.0017776766326278448, + "loss_iou": 0.83984375, + "loss_num": 0.036865234375, + "loss_xval": 1.859375, + "num_input_tokens_seen": 65822092, + "step": 521 + }, + { + "epoch": 0.13390623997947798, + "grad_norm": 103.97928619384766, + "learning_rate": 5e-06, + "loss": 1.7141, + "num_input_tokens_seen": 65948720, + "step": 522 + }, + { + "epoch": 0.13390623997947798, + "loss": 1.7977899312973022, + "loss_ce": 0.001891533494926989, + "loss_iou": 0.83984375, + "loss_num": 0.023681640625, + "loss_xval": 1.796875, + "num_input_tokens_seen": 65948720, + "step": 522 + }, + { + "epoch": 0.13416276534342333, + "grad_norm": 37.5026741027832, + "learning_rate": 5e-06, + "loss": 1.9319, + "num_input_tokens_seen": 66074208, + "step": 523 + }, + { + "epoch": 0.13416276534342333, + "loss": 1.9987287521362305, + "loss_ce": 0.004588027019053698, + "loss_iou": 0.890625, + "loss_num": 0.04248046875, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 66074208, + "step": 523 + }, + { + "epoch": 0.1344192907073687, + "grad_norm": 37.2819709777832, + "learning_rate": 5e-06, + "loss": 1.7502, + "num_input_tokens_seen": 66200624, + "step": 524 + }, + { + "epoch": 0.1344192907073687, + "loss": 1.7647587060928345, + "loss_ce": 0.005969722755253315, + "loss_iou": 0.80078125, + "loss_num": 0.03173828125, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 66200624, + "step": 524 + }, + { + "epoch": 0.13467581607131404, + "grad_norm": 30.838050842285156, + "learning_rate": 5e-06, + "loss": 1.5776, + "num_input_tokens_seen": 66325964, + "step": 525 + }, + { + "epoch": 0.13467581607131404, + "loss": 1.5363616943359375, + "loss_ce": 0.0021819553803652525, + "loss_iou": 0.71484375, + "loss_num": 0.0218505859375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 66325964, + "step": 525 + }, + { + "epoch": 0.13493234143525942, + "grad_norm": 40.01436996459961, + "learning_rate": 5e-06, + "loss": 1.6046, + "num_input_tokens_seen": 66452996, + "step": 526 + }, + { + "epoch": 0.13493234143525942, + "loss": 1.6087982654571533, + "loss_ce": 0.0018647679826244712, + "loss_iou": 0.72265625, + "loss_num": 0.0322265625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 66452996, + "step": 526 + }, + { + "epoch": 0.13518886679920478, + "grad_norm": 59.87229537963867, + "learning_rate": 5e-06, + "loss": 1.5952, + "num_input_tokens_seen": 66579460, + "step": 527 + }, + { + "epoch": 0.13518886679920478, + "loss": 1.6056694984436035, + "loss_ce": 0.002153842244297266, + "loss_iou": 0.734375, + "loss_num": 0.0272216796875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 66579460, + "step": 527 + }, + { + "epoch": 0.13544539216315013, + "grad_norm": 80.17083740234375, + "learning_rate": 5e-06, + "loss": 1.7347, + "num_input_tokens_seen": 66706472, + "step": 528 + }, + { + "epoch": 0.13544539216315013, + "loss": 1.702104091644287, + "loss_ce": 0.00483840424567461, + "loss_iou": 0.78125, + "loss_num": 0.02734375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 66706472, + "step": 528 + }, + { + "epoch": 0.13570191752709548, + "grad_norm": 54.53982925415039, + "learning_rate": 5e-06, + "loss": 1.7021, + "num_input_tokens_seen": 66832824, + "step": 529 + }, + { + "epoch": 0.13570191752709548, + "loss": 1.6209640502929688, + "loss_ce": 0.0018235087627545, + "loss_iou": 0.7578125, + "loss_num": 0.021484375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 66832824, + "step": 529 + }, + { + "epoch": 0.13595844289104086, + "grad_norm": 42.349605560302734, + "learning_rate": 5e-06, + "loss": 1.8149, + "num_input_tokens_seen": 66959856, + "step": 530 + }, + { + "epoch": 0.13595844289104086, + "loss": 1.6692118644714355, + "loss_ce": 0.0031962350476533175, + "loss_iou": 0.7578125, + "loss_num": 0.0296630859375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 66959856, + "step": 530 + }, + { + "epoch": 0.13621496825498622, + "grad_norm": 68.3973617553711, + "learning_rate": 5e-06, + "loss": 1.5381, + "num_input_tokens_seen": 67086300, + "step": 531 + }, + { + "epoch": 0.13621496825498622, + "loss": 1.566171407699585, + "loss_ce": 0.0007417317247018218, + "loss_iou": 0.72265625, + "loss_num": 0.0247802734375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 67086300, + "step": 531 + }, + { + "epoch": 0.13647149361893157, + "grad_norm": 45.78357696533203, + "learning_rate": 5e-06, + "loss": 1.5934, + "num_input_tokens_seen": 67211980, + "step": 532 + }, + { + "epoch": 0.13647149361893157, + "loss": 1.4631876945495605, + "loss_ce": 0.0012736708158627152, + "loss_iou": 0.68359375, + "loss_num": 0.01904296875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 67211980, + "step": 532 + }, + { + "epoch": 0.13672801898287693, + "grad_norm": 85.82099151611328, + "learning_rate": 5e-06, + "loss": 1.8069, + "num_input_tokens_seen": 67337608, + "step": 533 + }, + { + "epoch": 0.13672801898287693, + "loss": 2.0936241149902344, + "loss_ce": 0.005733604542911053, + "loss_iou": 0.9375, + "loss_num": 0.042724609375, + "loss_xval": 2.09375, + "num_input_tokens_seen": 67337608, + "step": 533 + }, + { + "epoch": 0.13698454434682228, + "grad_norm": 39.99359130859375, + "learning_rate": 5e-06, + "loss": 1.865, + "num_input_tokens_seen": 67463704, + "step": 534 + }, + { + "epoch": 0.13698454434682228, + "loss": 1.8516842126846313, + "loss_ce": 0.003051486797630787, + "loss_iou": 0.83203125, + "loss_num": 0.03759765625, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 67463704, + "step": 534 + }, + { + "epoch": 0.13724106971076766, + "grad_norm": 47.250640869140625, + "learning_rate": 5e-06, + "loss": 1.7352, + "num_input_tokens_seen": 67588860, + "step": 535 + }, + { + "epoch": 0.13724106971076766, + "loss": 1.7179440259933472, + "loss_ce": 0.0031003328040242195, + "loss_iou": 0.7734375, + "loss_num": 0.0341796875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 67588860, + "step": 535 + }, + { + "epoch": 0.137497595074713, + "grad_norm": 36.00251770019531, + "learning_rate": 5e-06, + "loss": 1.7124, + "num_input_tokens_seen": 67714920, + "step": 536 + }, + { + "epoch": 0.137497595074713, + "loss": 1.6225531101226807, + "loss_ce": 0.0014592884108424187, + "loss_iou": 0.734375, + "loss_num": 0.029541015625, + "loss_xval": 1.625, + "num_input_tokens_seen": 67714920, + "step": 536 + }, + { + "epoch": 0.13775412043865837, + "grad_norm": 94.15619659423828, + "learning_rate": 5e-06, + "loss": 1.6898, + "num_input_tokens_seen": 67841600, + "step": 537 + }, + { + "epoch": 0.13775412043865837, + "loss": 1.5061266422271729, + "loss_ce": 0.0012437943369150162, + "loss_iou": 0.7109375, + "loss_num": 0.016845703125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 67841600, + "step": 537 + }, + { + "epoch": 0.13801064580260372, + "grad_norm": 40.61013412475586, + "learning_rate": 5e-06, + "loss": 1.8367, + "num_input_tokens_seen": 67968416, + "step": 538 + }, + { + "epoch": 0.13801064580260372, + "loss": 1.8107746839523315, + "loss_ce": 0.004134010057896376, + "loss_iou": 0.8046875, + "loss_num": 0.0390625, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 67968416, + "step": 538 + }, + { + "epoch": 0.1382671711665491, + "grad_norm": 43.246856689453125, + "learning_rate": 5e-06, + "loss": 1.6373, + "num_input_tokens_seen": 68094916, + "step": 539 + }, + { + "epoch": 0.1382671711665491, + "loss": 1.5112301111221313, + "loss_ce": 0.00048792368033900857, + "loss_iou": 0.69140625, + "loss_num": 0.026123046875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 68094916, + "step": 539 + }, + { + "epoch": 0.13852369653049446, + "grad_norm": 49.97499084472656, + "learning_rate": 5e-06, + "loss": 1.5353, + "num_input_tokens_seen": 68222392, + "step": 540 + }, + { + "epoch": 0.13852369653049446, + "loss": 1.468103289604187, + "loss_ce": 0.0022829363588243723, + "loss_iou": 0.69140625, + "loss_num": 0.0162353515625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 68222392, + "step": 540 + }, + { + "epoch": 0.1387802218944398, + "grad_norm": 63.00365447998047, + "learning_rate": 5e-06, + "loss": 1.6641, + "num_input_tokens_seen": 68348700, + "step": 541 + }, + { + "epoch": 0.1387802218944398, + "loss": 1.7213560342788696, + "loss_ce": 0.005535767413675785, + "loss_iou": 0.76953125, + "loss_num": 0.03515625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 68348700, + "step": 541 + }, + { + "epoch": 0.13903674725838516, + "grad_norm": 49.11519241333008, + "learning_rate": 5e-06, + "loss": 1.6342, + "num_input_tokens_seen": 68476076, + "step": 542 + }, + { + "epoch": 0.13903674725838516, + "loss": 1.5131175518035889, + "loss_ce": 0.0023754204157739878, + "loss_iou": 0.6953125, + "loss_num": 0.0235595703125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 68476076, + "step": 542 + }, + { + "epoch": 0.13929327262233054, + "grad_norm": 50.91838836669922, + "learning_rate": 5e-06, + "loss": 1.4742, + "num_input_tokens_seen": 68601968, + "step": 543 + }, + { + "epoch": 0.13929327262233054, + "loss": 1.2956805229187012, + "loss_ce": 0.001735169906169176, + "loss_iou": 0.609375, + "loss_num": 0.0150146484375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 68601968, + "step": 543 + }, + { + "epoch": 0.1395497979862759, + "grad_norm": 46.84491729736328, + "learning_rate": 5e-06, + "loss": 1.5889, + "num_input_tokens_seen": 68728448, + "step": 544 + }, + { + "epoch": 0.1395497979862759, + "loss": 1.4698936939239502, + "loss_ce": 0.003096894593909383, + "loss_iou": 0.6796875, + "loss_num": 0.02099609375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 68728448, + "step": 544 + }, + { + "epoch": 0.13980632335022125, + "grad_norm": 92.95574951171875, + "learning_rate": 5e-06, + "loss": 1.5825, + "num_input_tokens_seen": 68854304, + "step": 545 + }, + { + "epoch": 0.13980632335022125, + "loss": 1.6145811080932617, + "loss_ce": 0.0022763521410524845, + "loss_iou": 0.75390625, + "loss_num": 0.021728515625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 68854304, + "step": 545 + }, + { + "epoch": 0.1400628487141666, + "grad_norm": 41.20380783081055, + "learning_rate": 5e-06, + "loss": 1.8984, + "num_input_tokens_seen": 68981032, + "step": 546 + }, + { + "epoch": 0.1400628487141666, + "loss": 2.0746302604675293, + "loss_ce": 0.0023645502515137196, + "loss_iou": 0.90625, + "loss_num": 0.052734375, + "loss_xval": 2.078125, + "num_input_tokens_seen": 68981032, + "step": 546 + }, + { + "epoch": 0.14031937407811199, + "grad_norm": 49.50692367553711, + "learning_rate": 5e-06, + "loss": 1.6751, + "num_input_tokens_seen": 69107184, + "step": 547 + }, + { + "epoch": 0.14031937407811199, + "loss": 1.585601568222046, + "loss_ce": 0.0006407101755030453, + "loss_iou": 0.7265625, + "loss_num": 0.02587890625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 69107184, + "step": 547 + }, + { + "epoch": 0.14057589944205734, + "grad_norm": 53.43033981323242, + "learning_rate": 5e-06, + "loss": 1.3986, + "num_input_tokens_seen": 69233372, + "step": 548 + }, + { + "epoch": 0.14057589944205734, + "loss": 1.3398373126983643, + "loss_ce": 0.0009700690279714763, + "loss_iou": 0.62890625, + "loss_num": 0.01519775390625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 69233372, + "step": 548 + }, + { + "epoch": 0.1408324248060027, + "grad_norm": 56.18838119506836, + "learning_rate": 5e-06, + "loss": 1.5654, + "num_input_tokens_seen": 69360428, + "step": 549 + }, + { + "epoch": 0.1408324248060027, + "loss": 1.522639513015747, + "loss_ce": 0.0031081875786185265, + "loss_iou": 0.703125, + "loss_num": 0.0223388671875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 69360428, + "step": 549 + }, + { + "epoch": 0.14108895016994805, + "grad_norm": 107.62277221679688, + "learning_rate": 5e-06, + "loss": 1.5514, + "num_input_tokens_seen": 69487312, + "step": 550 + }, + { + "epoch": 0.14108895016994805, + "loss": 1.5685968399047852, + "loss_ce": 0.005120271351188421, + "loss_iou": 0.734375, + "loss_num": 0.019287109375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 69487312, + "step": 550 + }, + { + "epoch": 0.1413454755338934, + "grad_norm": 42.770843505859375, + "learning_rate": 5e-06, + "loss": 1.9004, + "num_input_tokens_seen": 69615580, + "step": 551 + }, + { + "epoch": 0.1413454755338934, + "loss": 1.8985214233398438, + "loss_ce": 0.003013565670698881, + "loss_iou": 0.84765625, + "loss_num": 0.039306640625, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 69615580, + "step": 551 + }, + { + "epoch": 0.14160200089783878, + "grad_norm": 43.04829788208008, + "learning_rate": 5e-06, + "loss": 1.7076, + "num_input_tokens_seen": 69741496, + "step": 552 + }, + { + "epoch": 0.14160200089783878, + "loss": 1.757059097290039, + "loss_ce": 0.006082567851990461, + "loss_iou": 0.78125, + "loss_num": 0.037353515625, + "loss_xval": 1.75, + "num_input_tokens_seen": 69741496, + "step": 552 + }, + { + "epoch": 0.14185852626178413, + "grad_norm": 49.3670539855957, + "learning_rate": 5e-06, + "loss": 1.4928, + "num_input_tokens_seen": 69869180, + "step": 553 + }, + { + "epoch": 0.14185852626178413, + "loss": 1.416538953781128, + "loss_ce": 0.0005233290721662343, + "loss_iou": 0.6640625, + "loss_num": 0.018310546875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 69869180, + "step": 553 + }, + { + "epoch": 0.1421150516257295, + "grad_norm": 45.51365280151367, + "learning_rate": 5e-06, + "loss": 1.5454, + "num_input_tokens_seen": 69994652, + "step": 554 + }, + { + "epoch": 0.1421150516257295, + "loss": 1.473512887954712, + "loss_ce": 0.000368429406080395, + "loss_iou": 0.69140625, + "loss_num": 0.0177001953125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 69994652, + "step": 554 + }, + { + "epoch": 0.14237157698967484, + "grad_norm": 67.81134033203125, + "learning_rate": 5e-06, + "loss": 1.482, + "num_input_tokens_seen": 70120604, + "step": 555 + }, + { + "epoch": 0.14237157698967484, + "loss": 1.3690364360809326, + "loss_ce": 0.0008723422652110457, + "loss_iou": 0.640625, + "loss_num": 0.0174560546875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 70120604, + "step": 555 + }, + { + "epoch": 0.14262810235362022, + "grad_norm": 57.1545295715332, + "learning_rate": 5e-06, + "loss": 1.8011, + "num_input_tokens_seen": 70246764, + "step": 556 + }, + { + "epoch": 0.14262810235362022, + "loss": 1.6394314765930176, + "loss_ce": 0.002712684217840433, + "loss_iou": 0.7578125, + "loss_num": 0.0247802734375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 70246764, + "step": 556 + }, + { + "epoch": 0.14288462771756558, + "grad_norm": 46.878238677978516, + "learning_rate": 5e-06, + "loss": 1.6197, + "num_input_tokens_seen": 70370944, + "step": 557 + }, + { + "epoch": 0.14288462771756558, + "loss": 1.8017077445983887, + "loss_ce": 0.004832741804420948, + "loss_iou": 0.8125, + "loss_num": 0.03466796875, + "loss_xval": 1.796875, + "num_input_tokens_seen": 70370944, + "step": 557 + }, + { + "epoch": 0.14314115308151093, + "grad_norm": 58.380619049072266, + "learning_rate": 5e-06, + "loss": 1.5898, + "num_input_tokens_seen": 70496972, + "step": 558 + }, + { + "epoch": 0.14314115308151093, + "loss": 1.4451154470443726, + "loss_ce": 0.0017561402637511492, + "loss_iou": 0.671875, + "loss_num": 0.0205078125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 70496972, + "step": 558 + }, + { + "epoch": 0.14339767844545628, + "grad_norm": 51.696044921875, + "learning_rate": 5e-06, + "loss": 1.711, + "num_input_tokens_seen": 70623052, + "step": 559 + }, + { + "epoch": 0.14339767844545628, + "loss": 1.839169979095459, + "loss_ce": 0.003232384566217661, + "loss_iou": 0.83203125, + "loss_num": 0.033447265625, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 70623052, + "step": 559 + }, + { + "epoch": 0.14365420380940166, + "grad_norm": 76.79081726074219, + "learning_rate": 5e-06, + "loss": 1.5567, + "num_input_tokens_seen": 70748948, + "step": 560 + }, + { + "epoch": 0.14365420380940166, + "loss": 1.336108684539795, + "loss_ce": 0.0011477943044155836, + "loss_iou": 0.62890625, + "loss_num": 0.01513671875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 70748948, + "step": 560 + }, + { + "epoch": 0.14391072917334702, + "grad_norm": 43.05482482910156, + "learning_rate": 5e-06, + "loss": 1.5965, + "num_input_tokens_seen": 70875008, + "step": 561 + }, + { + "epoch": 0.14391072917334702, + "loss": 1.5724600553512573, + "loss_ce": 0.0011710493126884103, + "loss_iou": 0.71875, + "loss_num": 0.027587890625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 70875008, + "step": 561 + }, + { + "epoch": 0.14416725453729237, + "grad_norm": 56.391578674316406, + "learning_rate": 5e-06, + "loss": 1.5623, + "num_input_tokens_seen": 71001072, + "step": 562 + }, + { + "epoch": 0.14416725453729237, + "loss": 1.5528550148010254, + "loss_ce": 0.0020737135782837868, + "loss_iou": 0.72265625, + "loss_num": 0.02099609375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 71001072, + "step": 562 + }, + { + "epoch": 0.14442377990123773, + "grad_norm": 46.05347442626953, + "learning_rate": 5e-06, + "loss": 1.7065, + "num_input_tokens_seen": 71126932, + "step": 563 + }, + { + "epoch": 0.14442377990123773, + "loss": 1.6381909847259521, + "loss_ce": 0.0014722333289682865, + "loss_iou": 0.7578125, + "loss_num": 0.02392578125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 71126932, + "step": 563 + }, + { + "epoch": 0.1446803052651831, + "grad_norm": 66.55777740478516, + "learning_rate": 5e-06, + "loss": 1.4283, + "num_input_tokens_seen": 71253280, + "step": 564 + }, + { + "epoch": 0.1446803052651831, + "loss": 1.2392961978912354, + "loss_ce": 0.0010148679139092565, + "loss_iou": 0.58984375, + "loss_num": 0.01202392578125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 71253280, + "step": 564 + }, + { + "epoch": 0.14493683062912846, + "grad_norm": 46.23533630371094, + "learning_rate": 5e-06, + "loss": 1.6927, + "num_input_tokens_seen": 71379980, + "step": 565 + }, + { + "epoch": 0.14493683062912846, + "loss": 1.6584994792938232, + "loss_ce": 0.0012729023583233356, + "loss_iou": 0.73828125, + "loss_num": 0.035400390625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 71379980, + "step": 565 + }, + { + "epoch": 0.1451933559930738, + "grad_norm": 37.38328170776367, + "learning_rate": 5e-06, + "loss": 1.661, + "num_input_tokens_seen": 71507300, + "step": 566 + }, + { + "epoch": 0.1451933559930738, + "loss": 1.6988778114318848, + "loss_ce": 0.000635545002296567, + "loss_iou": 0.7734375, + "loss_num": 0.030517578125, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 71507300, + "step": 566 + }, + { + "epoch": 0.14544988135701917, + "grad_norm": 48.878726959228516, + "learning_rate": 5e-06, + "loss": 1.5643, + "num_input_tokens_seen": 71634524, + "step": 567 + }, + { + "epoch": 0.14544988135701917, + "loss": 1.3866894245147705, + "loss_ce": 0.0009473641403019428, + "loss_iou": 0.65234375, + "loss_num": 0.016357421875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 71634524, + "step": 567 + }, + { + "epoch": 0.14570640672096455, + "grad_norm": 53.29856491088867, + "learning_rate": 5e-06, + "loss": 1.7319, + "num_input_tokens_seen": 71761132, + "step": 568 + }, + { + "epoch": 0.14570640672096455, + "loss": 1.8051761388778687, + "loss_ce": 0.0014652200043201447, + "loss_iou": 0.81640625, + "loss_num": 0.03369140625, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 71761132, + "step": 568 + }, + { + "epoch": 0.1459629320849099, + "grad_norm": 48.535682678222656, + "learning_rate": 5e-06, + "loss": 1.6325, + "num_input_tokens_seen": 71887352, + "step": 569 + }, + { + "epoch": 0.1459629320849099, + "loss": 1.6104873418807983, + "loss_ce": 0.0030655106529593468, + "loss_iou": 0.734375, + "loss_num": 0.0272216796875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 71887352, + "step": 569 + }, + { + "epoch": 0.14621945744885526, + "grad_norm": 60.103309631347656, + "learning_rate": 5e-06, + "loss": 1.7861, + "num_input_tokens_seen": 72013904, + "step": 570 + }, + { + "epoch": 0.14621945744885526, + "loss": 1.6273894309997559, + "loss_ce": 0.0033660721965134144, + "loss_iou": 0.74609375, + "loss_num": 0.02587890625, + "loss_xval": 1.625, + "num_input_tokens_seen": 72013904, + "step": 570 + }, + { + "epoch": 0.1464759828128006, + "grad_norm": 35.31315612792969, + "learning_rate": 5e-06, + "loss": 1.6818, + "num_input_tokens_seen": 72139984, + "step": 571 + }, + { + "epoch": 0.1464759828128006, + "loss": 1.8573236465454102, + "loss_ce": 0.0013665887527167797, + "loss_iou": 0.83984375, + "loss_num": 0.0361328125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 72139984, + "step": 571 + }, + { + "epoch": 0.14673250817674596, + "grad_norm": 39.63682556152344, + "learning_rate": 5e-06, + "loss": 1.4634, + "num_input_tokens_seen": 72266212, + "step": 572 + }, + { + "epoch": 0.14673250817674596, + "loss": 1.4371800422668457, + "loss_ce": 0.0026096974033862352, + "loss_iou": 0.66796875, + "loss_num": 0.0191650390625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 72266212, + "step": 572 + }, + { + "epoch": 0.14698903354069134, + "grad_norm": 73.38729858398438, + "learning_rate": 5e-06, + "loss": 1.5782, + "num_input_tokens_seen": 72393136, + "step": 573 + }, + { + "epoch": 0.14698903354069134, + "loss": 1.584549069404602, + "loss_ce": 0.0025178072974085808, + "loss_iou": 0.7265625, + "loss_num": 0.02490234375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 72393136, + "step": 573 + }, + { + "epoch": 0.1472455589046367, + "grad_norm": 43.090694427490234, + "learning_rate": 5e-06, + "loss": 1.5578, + "num_input_tokens_seen": 72518564, + "step": 574 + }, + { + "epoch": 0.1472455589046367, + "loss": 1.630611538887024, + "loss_ce": 0.002681904472410679, + "loss_iou": 0.7421875, + "loss_num": 0.02880859375, + "loss_xval": 1.625, + "num_input_tokens_seen": 72518564, + "step": 574 + }, + { + "epoch": 0.14750208426858205, + "grad_norm": 36.16836166381836, + "learning_rate": 5e-06, + "loss": 1.49, + "num_input_tokens_seen": 72645676, + "step": 575 + }, + { + "epoch": 0.14750208426858205, + "loss": 1.3360331058502197, + "loss_ce": 0.0005838978104293346, + "loss_iou": 0.62890625, + "loss_num": 0.01556396484375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 72645676, + "step": 575 + }, + { + "epoch": 0.1477586096325274, + "grad_norm": 70.74276733398438, + "learning_rate": 5e-06, + "loss": 1.5439, + "num_input_tokens_seen": 72771428, + "step": 576 + }, + { + "epoch": 0.1477586096325274, + "loss": 1.5601820945739746, + "loss_ce": 0.0025649545714259148, + "loss_iou": 0.71875, + "loss_num": 0.0245361328125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 72771428, + "step": 576 + }, + { + "epoch": 0.14801513499647279, + "grad_norm": 40.81587219238281, + "learning_rate": 5e-06, + "loss": 1.7654, + "num_input_tokens_seen": 72897336, + "step": 577 + }, + { + "epoch": 0.14801513499647279, + "loss": 1.8659732341766357, + "loss_ce": 0.0017152922227978706, + "loss_iou": 0.83984375, + "loss_num": 0.03662109375, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 72897336, + "step": 577 + }, + { + "epoch": 0.14827166036041814, + "grad_norm": 39.28877639770508, + "learning_rate": 5e-06, + "loss": 1.4362, + "num_input_tokens_seen": 73024072, + "step": 578 + }, + { + "epoch": 0.14827166036041814, + "loss": 1.3250255584716797, + "loss_ce": 0.0027599718887358904, + "loss_iou": 0.625, + "loss_num": 0.01385498046875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 73024072, + "step": 578 + }, + { + "epoch": 0.1485281857243635, + "grad_norm": 45.285396575927734, + "learning_rate": 5e-06, + "loss": 1.5091, + "num_input_tokens_seen": 73149644, + "step": 579 + }, + { + "epoch": 0.1485281857243635, + "loss": 1.4105304479599, + "loss_ce": 0.0013507843250408769, + "loss_iou": 0.66015625, + "loss_num": 0.017333984375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 73149644, + "step": 579 + }, + { + "epoch": 0.14878471108830885, + "grad_norm": 57.239322662353516, + "learning_rate": 5e-06, + "loss": 1.4544, + "num_input_tokens_seen": 73276728, + "step": 580 + }, + { + "epoch": 0.14878471108830885, + "loss": 1.4242689609527588, + "loss_ce": 0.0023940117098391056, + "loss_iou": 0.65234375, + "loss_num": 0.0238037109375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 73276728, + "step": 580 + }, + { + "epoch": 0.14904123645225423, + "grad_norm": 75.37710571289062, + "learning_rate": 5e-06, + "loss": 1.5413, + "num_input_tokens_seen": 73403492, + "step": 581 + }, + { + "epoch": 0.14904123645225423, + "loss": 1.4440460205078125, + "loss_ce": 0.000686710060108453, + "loss_iou": 0.65625, + "loss_num": 0.025634765625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 73403492, + "step": 581 + }, + { + "epoch": 0.14929776181619958, + "grad_norm": 50.49420166015625, + "learning_rate": 5e-06, + "loss": 1.7171, + "num_input_tokens_seen": 73530384, + "step": 582 + }, + { + "epoch": 0.14929776181619958, + "loss": 1.6590261459350586, + "loss_ce": 0.001799560384824872, + "loss_iou": 0.7578125, + "loss_num": 0.0283203125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 73530384, + "step": 582 + }, + { + "epoch": 0.14955428718014493, + "grad_norm": 43.39142608642578, + "learning_rate": 5e-06, + "loss": 1.5826, + "num_input_tokens_seen": 73655548, + "step": 583 + }, + { + "epoch": 0.14955428718014493, + "loss": 1.8158423900604248, + "loss_ce": 0.0043189385905861855, + "loss_iou": 0.81640625, + "loss_num": 0.035400390625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 73655548, + "step": 583 + }, + { + "epoch": 0.1498108125440903, + "grad_norm": 49.67422866821289, + "learning_rate": 5e-06, + "loss": 1.7034, + "num_input_tokens_seen": 73780436, + "step": 584 + }, + { + "epoch": 0.1498108125440903, + "loss": 1.8468043804168701, + "loss_ce": 0.0020777375902980566, + "loss_iou": 0.828125, + "loss_num": 0.037841796875, + "loss_xval": 1.84375, + "num_input_tokens_seen": 73780436, + "step": 584 + }, + { + "epoch": 0.15006733790803567, + "grad_norm": 38.22661209106445, + "learning_rate": 5e-06, + "loss": 1.4372, + "num_input_tokens_seen": 73906716, + "step": 585 + }, + { + "epoch": 0.15006733790803567, + "loss": 1.4204368591308594, + "loss_ce": 0.0034446953795850277, + "loss_iou": 0.65625, + "loss_num": 0.0203857421875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 73906716, + "step": 585 + }, + { + "epoch": 0.15032386327198102, + "grad_norm": 82.67743682861328, + "learning_rate": 5e-06, + "loss": 1.5307, + "num_input_tokens_seen": 74033596, + "step": 586 + }, + { + "epoch": 0.15032386327198102, + "loss": 1.3638746738433838, + "loss_ce": 0.0035232193768024445, + "loss_iou": 0.62890625, + "loss_num": 0.0203857421875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 74033596, + "step": 586 + }, + { + "epoch": 0.15058038863592638, + "grad_norm": 42.65348815917969, + "learning_rate": 5e-06, + "loss": 1.6281, + "num_input_tokens_seen": 74159880, + "step": 587 + }, + { + "epoch": 0.15058038863592638, + "loss": 1.5148348808288574, + "loss_ce": 0.0011629253858700395, + "loss_iou": 0.69921875, + "loss_num": 0.0228271484375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 74159880, + "step": 587 + }, + { + "epoch": 0.15083691399987173, + "grad_norm": 52.83303451538086, + "learning_rate": 5e-06, + "loss": 1.6874, + "num_input_tokens_seen": 74285212, + "step": 588 + }, + { + "epoch": 0.15083691399987173, + "loss": 1.6562573909759521, + "loss_ce": 0.0009839492850005627, + "loss_iou": 0.7578125, + "loss_num": 0.0279541015625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 74285212, + "step": 588 + }, + { + "epoch": 0.15109343936381708, + "grad_norm": 48.557437896728516, + "learning_rate": 5e-06, + "loss": 1.7268, + "num_input_tokens_seen": 74411764, + "step": 589 + }, + { + "epoch": 0.15109343936381708, + "loss": 1.6911873817443848, + "loss_ce": 0.0017342529026791453, + "loss_iou": 0.7734375, + "loss_num": 0.0291748046875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 74411764, + "step": 589 + }, + { + "epoch": 0.15134996472776246, + "grad_norm": 52.513004302978516, + "learning_rate": 5e-06, + "loss": 1.579, + "num_input_tokens_seen": 74537668, + "step": 590 + }, + { + "epoch": 0.15134996472776246, + "loss": 1.4191499948501587, + "loss_ce": 0.004110934678465128, + "loss_iou": 0.66015625, + "loss_num": 0.0189208984375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 74537668, + "step": 590 + }, + { + "epoch": 0.15160649009170782, + "grad_norm": 45.65567398071289, + "learning_rate": 5e-06, + "loss": 1.5378, + "num_input_tokens_seen": 74663560, + "step": 591 + }, + { + "epoch": 0.15160649009170782, + "loss": 1.4624228477478027, + "loss_ce": 0.0005087483441457152, + "loss_iou": 0.68359375, + "loss_num": 0.01904296875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 74663560, + "step": 591 + }, + { + "epoch": 0.15186301545565317, + "grad_norm": 64.55572509765625, + "learning_rate": 5e-06, + "loss": 1.7781, + "num_input_tokens_seen": 74789548, + "step": 592 + }, + { + "epoch": 0.15186301545565317, + "loss": 1.8094186782836914, + "loss_ce": 0.003754502162337303, + "loss_iou": 0.82421875, + "loss_num": 0.031005859375, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 74789548, + "step": 592 + }, + { + "epoch": 0.15211954081959853, + "grad_norm": 48.58171463012695, + "learning_rate": 5e-06, + "loss": 1.8721, + "num_input_tokens_seen": 74916696, + "step": 593 + }, + { + "epoch": 0.15211954081959853, + "loss": 1.7366645336151123, + "loss_ce": 0.0013128416612744331, + "loss_iou": 0.78515625, + "loss_num": 0.032470703125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 74916696, + "step": 593 + }, + { + "epoch": 0.1523760661835439, + "grad_norm": 51.23349380493164, + "learning_rate": 5e-06, + "loss": 1.5553, + "num_input_tokens_seen": 75043144, + "step": 594 + }, + { + "epoch": 0.1523760661835439, + "loss": 2.00990629196167, + "loss_ce": 0.004047081805765629, + "loss_iou": 0.88671875, + "loss_num": 0.04541015625, + "loss_xval": 2.0, + "num_input_tokens_seen": 75043144, + "step": 594 + }, + { + "epoch": 0.15263259154748926, + "grad_norm": 54.21349334716797, + "learning_rate": 5e-06, + "loss": 1.4874, + "num_input_tokens_seen": 75168820, + "step": 595 + }, + { + "epoch": 0.15263259154748926, + "loss": 1.444124698638916, + "loss_ce": 0.0017418676288798451, + "loss_iou": 0.671875, + "loss_num": 0.01953125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 75168820, + "step": 595 + }, + { + "epoch": 0.1528891169114346, + "grad_norm": 56.62431716918945, + "learning_rate": 5e-06, + "loss": 1.6738, + "num_input_tokens_seen": 75296192, + "step": 596 + }, + { + "epoch": 0.1528891169114346, + "loss": 1.876996397972107, + "loss_ce": 0.004926127847284079, + "loss_iou": 0.84375, + "loss_num": 0.036865234375, + "loss_xval": 1.875, + "num_input_tokens_seen": 75296192, + "step": 596 + }, + { + "epoch": 0.15314564227537997, + "grad_norm": 73.03638458251953, + "learning_rate": 5e-06, + "loss": 1.4456, + "num_input_tokens_seen": 75423044, + "step": 597 + }, + { + "epoch": 0.15314564227537997, + "loss": 1.5212998390197754, + "loss_ce": 0.0012802882120013237, + "loss_iou": 0.6875, + "loss_num": 0.0284423828125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 75423044, + "step": 597 + }, + { + "epoch": 0.15340216763932535, + "grad_norm": 54.039939880371094, + "learning_rate": 5e-06, + "loss": 1.6402, + "num_input_tokens_seen": 75550176, + "step": 598 + }, + { + "epoch": 0.15340216763932535, + "loss": 1.8275648355484009, + "loss_ce": 0.004322602413594723, + "loss_iou": 0.8046875, + "loss_num": 0.042724609375, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 75550176, + "step": 598 + }, + { + "epoch": 0.1536586930032707, + "grad_norm": 38.54146194458008, + "learning_rate": 5e-06, + "loss": 1.4683, + "num_input_tokens_seen": 75677316, + "step": 599 + }, + { + "epoch": 0.1536586930032707, + "loss": 1.4439986944198608, + "loss_ce": 0.0021041773725301027, + "loss_iou": 0.66796875, + "loss_num": 0.02099609375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 75677316, + "step": 599 + }, + { + "epoch": 0.15391521836721606, + "grad_norm": 50.49699401855469, + "learning_rate": 5e-06, + "loss": 1.4704, + "num_input_tokens_seen": 75803556, + "step": 600 + }, + { + "epoch": 0.15391521836721606, + "loss": 1.2889337539672852, + "loss_ce": 0.0023127233143895864, + "loss_iou": 0.59765625, + "loss_num": 0.01806640625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 75803556, + "step": 600 + }, + { + "epoch": 0.1541717437311614, + "grad_norm": 38.798606872558594, + "learning_rate": 5e-06, + "loss": 1.5553, + "num_input_tokens_seen": 75928968, + "step": 601 + }, + { + "epoch": 0.1541717437311614, + "loss": 1.4038331508636475, + "loss_ce": 0.00441913353279233, + "loss_iou": 0.6484375, + "loss_num": 0.02001953125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 75928968, + "step": 601 + }, + { + "epoch": 0.1544282690951068, + "grad_norm": 56.033660888671875, + "learning_rate": 5e-06, + "loss": 1.4756, + "num_input_tokens_seen": 76056176, + "step": 602 + }, + { + "epoch": 0.1544282690951068, + "loss": 1.6018264293670654, + "loss_ce": 0.006123394705355167, + "loss_iou": 0.73828125, + "loss_num": 0.0238037109375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 76056176, + "step": 602 + }, + { + "epoch": 0.15468479445905214, + "grad_norm": 62.73344039916992, + "learning_rate": 5e-06, + "loss": 1.677, + "num_input_tokens_seen": 76182672, + "step": 603 + }, + { + "epoch": 0.15468479445905214, + "loss": 1.8152211904525757, + "loss_ce": 0.0027212006971240044, + "loss_iou": 0.80859375, + "loss_num": 0.038818359375, + "loss_xval": 1.8125, + "num_input_tokens_seen": 76182672, + "step": 603 + }, + { + "epoch": 0.1549413198229975, + "grad_norm": 49.5567626953125, + "learning_rate": 5e-06, + "loss": 1.5673, + "num_input_tokens_seen": 76309224, + "step": 604 + }, + { + "epoch": 0.1549413198229975, + "loss": 1.6534454822540283, + "loss_ce": 0.0020781990606337786, + "loss_iou": 0.75, + "loss_num": 0.030517578125, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 76309224, + "step": 604 + }, + { + "epoch": 0.15519784518694285, + "grad_norm": 74.88670349121094, + "learning_rate": 5e-06, + "loss": 1.5139, + "num_input_tokens_seen": 76435308, + "step": 605 + }, + { + "epoch": 0.15519784518694285, + "loss": 1.4446709156036377, + "loss_ce": 0.005217882804572582, + "loss_iou": 0.6796875, + "loss_num": 0.0166015625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 76435308, + "step": 605 + }, + { + "epoch": 0.15545437055088823, + "grad_norm": 49.866050720214844, + "learning_rate": 5e-06, + "loss": 1.799, + "num_input_tokens_seen": 76561616, + "step": 606 + }, + { + "epoch": 0.15545437055088823, + "loss": 1.8579350709915161, + "loss_ce": 0.0005131715442985296, + "loss_iou": 0.83203125, + "loss_num": 0.038330078125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 76561616, + "step": 606 + }, + { + "epoch": 0.15571089591483359, + "grad_norm": 39.4975471496582, + "learning_rate": 5e-06, + "loss": 1.4822, + "num_input_tokens_seen": 76687284, + "step": 607 + }, + { + "epoch": 0.15571089591483359, + "loss": 1.3781297206878662, + "loss_ce": 0.004594467580318451, + "loss_iou": 0.63671875, + "loss_num": 0.0208740234375, + "loss_xval": 1.375, + "num_input_tokens_seen": 76687284, + "step": 607 + }, + { + "epoch": 0.15596742127877894, + "grad_norm": 91.91250610351562, + "learning_rate": 5e-06, + "loss": 1.5994, + "num_input_tokens_seen": 76813556, + "step": 608 + }, + { + "epoch": 0.15596742127877894, + "loss": 1.6230511665344238, + "loss_ce": 0.003910559229552746, + "loss_iou": 0.75390625, + "loss_num": 0.0218505859375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 76813556, + "step": 608 + }, + { + "epoch": 0.1562239466427243, + "grad_norm": 53.741783142089844, + "learning_rate": 5e-06, + "loss": 1.7303, + "num_input_tokens_seen": 76940380, + "step": 609 + }, + { + "epoch": 0.1562239466427243, + "loss": 1.7919665575027466, + "loss_ce": 0.00876346230506897, + "loss_iou": 0.79296875, + "loss_num": 0.0400390625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 76940380, + "step": 609 + }, + { + "epoch": 0.15648047200666965, + "grad_norm": 28.128437042236328, + "learning_rate": 5e-06, + "loss": 1.5153, + "num_input_tokens_seen": 77066396, + "step": 610 + }, + { + "epoch": 0.15648047200666965, + "loss": 1.6679878234863281, + "loss_ce": 0.000995634589344263, + "loss_iou": 0.7421875, + "loss_num": 0.03564453125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 77066396, + "step": 610 + }, + { + "epoch": 0.15673699737061503, + "grad_norm": 38.44987106323242, + "learning_rate": 5e-06, + "loss": 1.4808, + "num_input_tokens_seen": 77192576, + "step": 611 + }, + { + "epoch": 0.15673699737061503, + "loss": 1.5539766550064087, + "loss_ce": 0.006125175394117832, + "loss_iou": 0.6953125, + "loss_num": 0.031982421875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 77192576, + "step": 611 + }, + { + "epoch": 0.15699352273456038, + "grad_norm": 42.712196350097656, + "learning_rate": 5e-06, + "loss": 1.5177, + "num_input_tokens_seen": 77318340, + "step": 612 + }, + { + "epoch": 0.15699352273456038, + "loss": 1.5805890560150146, + "loss_ce": 0.002464146353304386, + "loss_iou": 0.71484375, + "loss_num": 0.0299072265625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 77318340, + "step": 612 + }, + { + "epoch": 0.15725004809850573, + "grad_norm": 67.8608627319336, + "learning_rate": 5e-06, + "loss": 1.5112, + "num_input_tokens_seen": 77444112, + "step": 613 + }, + { + "epoch": 0.15725004809850573, + "loss": 1.3369100093841553, + "loss_ce": 0.002925680484622717, + "loss_iou": 0.6171875, + "loss_num": 0.0196533203125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 77444112, + "step": 613 + }, + { + "epoch": 0.1575065734624511, + "grad_norm": 51.244388580322266, + "learning_rate": 5e-06, + "loss": 1.5678, + "num_input_tokens_seen": 77569616, + "step": 614 + }, + { + "epoch": 0.1575065734624511, + "loss": 1.5358669757843018, + "loss_ce": 0.004616935271769762, + "loss_iou": 0.7109375, + "loss_num": 0.0220947265625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 77569616, + "step": 614 + }, + { + "epoch": 0.15776309882639647, + "grad_norm": 39.469173431396484, + "learning_rate": 5e-06, + "loss": 1.683, + "num_input_tokens_seen": 77695044, + "step": 615 + }, + { + "epoch": 0.15776309882639647, + "loss": 1.7164641618728638, + "loss_ce": 0.008456309325993061, + "loss_iou": 0.7890625, + "loss_num": 0.025146484375, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 77695044, + "step": 615 + }, + { + "epoch": 0.15801962419034182, + "grad_norm": 62.49048614501953, + "learning_rate": 5e-06, + "loss": 1.5193, + "num_input_tokens_seen": 77820756, + "step": 616 + }, + { + "epoch": 0.15801962419034182, + "loss": 1.7489218711853027, + "loss_ce": 0.0018515517003834248, + "loss_iou": 0.78515625, + "loss_num": 0.03564453125, + "loss_xval": 1.75, + "num_input_tokens_seen": 77820756, + "step": 616 + }, + { + "epoch": 0.15827614955428718, + "grad_norm": 95.73761749267578, + "learning_rate": 5e-06, + "loss": 1.5191, + "num_input_tokens_seen": 77946856, + "step": 617 + }, + { + "epoch": 0.15827614955428718, + "loss": 1.4469906091690063, + "loss_ce": 0.001678092172369361, + "loss_iou": 0.671875, + "loss_num": 0.02099609375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 77946856, + "step": 617 + }, + { + "epoch": 0.15853267491823253, + "grad_norm": 46.59117126464844, + "learning_rate": 5e-06, + "loss": 1.7558, + "num_input_tokens_seen": 78072428, + "step": 618 + }, + { + "epoch": 0.15853267491823253, + "loss": 1.8148832321166992, + "loss_ce": 0.007265983149409294, + "loss_iou": 0.8046875, + "loss_num": 0.039306640625, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 78072428, + "step": 618 + }, + { + "epoch": 0.1587892002821779, + "grad_norm": 39.142032623291016, + "learning_rate": 5e-06, + "loss": 1.5875, + "num_input_tokens_seen": 78199144, + "step": 619 + }, + { + "epoch": 0.1587892002821779, + "loss": 1.518519639968872, + "loss_ce": 0.0028945913072675467, + "loss_iou": 0.69140625, + "loss_num": 0.026123046875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 78199144, + "step": 619 + }, + { + "epoch": 0.15904572564612326, + "grad_norm": 27.324047088623047, + "learning_rate": 5e-06, + "loss": 1.5821, + "num_input_tokens_seen": 78325808, + "step": 620 + }, + { + "epoch": 0.15904572564612326, + "loss": 1.4541864395141602, + "loss_ce": 0.0010614325292408466, + "loss_iou": 0.6796875, + "loss_num": 0.0191650390625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 78325808, + "step": 620 + }, + { + "epoch": 0.15930225101006862, + "grad_norm": 68.94915771484375, + "learning_rate": 5e-06, + "loss": 1.452, + "num_input_tokens_seen": 78452628, + "step": 621 + }, + { + "epoch": 0.15930225101006862, + "loss": 1.3846306800842285, + "loss_ce": 0.000841553439386189, + "loss_iou": 0.640625, + "loss_num": 0.01953125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 78452628, + "step": 621 + }, + { + "epoch": 0.15955877637401397, + "grad_norm": 49.75607681274414, + "learning_rate": 5e-06, + "loss": 1.5777, + "num_input_tokens_seen": 78579288, + "step": 622 + }, + { + "epoch": 0.15955877637401397, + "loss": 1.4593671560287476, + "loss_ce": 0.00038276921259239316, + "loss_iou": 0.671875, + "loss_num": 0.0233154296875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 78579288, + "step": 622 + }, + { + "epoch": 0.15981530173795935, + "grad_norm": 37.10400390625, + "learning_rate": 5e-06, + "loss": 1.4073, + "num_input_tokens_seen": 78705280, + "step": 623 + }, + { + "epoch": 0.15981530173795935, + "loss": 1.455942988395691, + "loss_ce": 0.0028179753571748734, + "loss_iou": 0.66015625, + "loss_num": 0.0269775390625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 78705280, + "step": 623 + }, + { + "epoch": 0.1600718271019047, + "grad_norm": 29.619606018066406, + "learning_rate": 5e-06, + "loss": 1.4822, + "num_input_tokens_seen": 78831620, + "step": 624 + }, + { + "epoch": 0.1600718271019047, + "loss": 1.600303292274475, + "loss_ce": 0.0006939612794667482, + "loss_iou": 0.71875, + "loss_num": 0.032958984375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 78831620, + "step": 624 + }, + { + "epoch": 0.16032835246585006, + "grad_norm": 60.412601470947266, + "learning_rate": 5e-06, + "loss": 1.2927, + "num_input_tokens_seen": 78958500, + "step": 625 + }, + { + "epoch": 0.16032835246585006, + "loss": 1.5193530321121216, + "loss_ce": 0.0017749086255207658, + "loss_iou": 0.71484375, + "loss_num": 0.0179443359375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 78958500, + "step": 625 + }, + { + "epoch": 0.1605848778297954, + "grad_norm": 49.4111328125, + "learning_rate": 5e-06, + "loss": 1.65, + "num_input_tokens_seen": 79085284, + "step": 626 + }, + { + "epoch": 0.1605848778297954, + "loss": 1.847962737083435, + "loss_ce": 0.0032361664343625307, + "loss_iou": 0.828125, + "loss_num": 0.03662109375, + "loss_xval": 1.84375, + "num_input_tokens_seen": 79085284, + "step": 626 + }, + { + "epoch": 0.16084140319374077, + "grad_norm": 62.8826789855957, + "learning_rate": 5e-06, + "loss": 1.4087, + "num_input_tokens_seen": 79211980, + "step": 627 + }, + { + "epoch": 0.16084140319374077, + "loss": 1.4072067737579346, + "loss_ce": 0.0024216885212808847, + "loss_iou": 0.6484375, + "loss_num": 0.022216796875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 79211980, + "step": 627 + }, + { + "epoch": 0.16109792855768615, + "grad_norm": 48.82058334350586, + "learning_rate": 5e-06, + "loss": 1.6276, + "num_input_tokens_seen": 79338084, + "step": 628 + }, + { + "epoch": 0.16109792855768615, + "loss": 1.38386070728302, + "loss_ce": 0.004466169513761997, + "loss_iou": 0.6328125, + "loss_num": 0.02294921875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 79338084, + "step": 628 + }, + { + "epoch": 0.1613544539216315, + "grad_norm": 27.47557258605957, + "learning_rate": 5e-06, + "loss": 1.4631, + "num_input_tokens_seen": 79465124, + "step": 629 + }, + { + "epoch": 0.1613544539216315, + "loss": 1.4801063537597656, + "loss_ce": 0.0015908535569906235, + "loss_iou": 0.67578125, + "loss_num": 0.0262451171875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 79465124, + "step": 629 + }, + { + "epoch": 0.16161097928557686, + "grad_norm": 44.35489273071289, + "learning_rate": 5e-06, + "loss": 1.4472, + "num_input_tokens_seen": 79591644, + "step": 630 + }, + { + "epoch": 0.16161097928557686, + "loss": 1.5442371368408203, + "loss_ce": 0.0012683806708082557, + "loss_iou": 0.703125, + "loss_num": 0.02783203125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 79591644, + "step": 630 + }, + { + "epoch": 0.1618675046495222, + "grad_norm": 95.3369369506836, + "learning_rate": 5e-06, + "loss": 1.6033, + "num_input_tokens_seen": 79717444, + "step": 631 + }, + { + "epoch": 0.1618675046495222, + "loss": 1.5621118545532227, + "loss_ce": 0.001564964884892106, + "loss_iou": 0.72265625, + "loss_num": 0.0225830078125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 79717444, + "step": 631 + }, + { + "epoch": 0.1621240300134676, + "grad_norm": 49.88316345214844, + "learning_rate": 5e-06, + "loss": 1.7543, + "num_input_tokens_seen": 79844172, + "step": 632 + }, + { + "epoch": 0.1621240300134676, + "loss": 1.7254849672317505, + "loss_ce": 0.0018520853482186794, + "loss_iou": 0.77734375, + "loss_num": 0.03369140625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 79844172, + "step": 632 + }, + { + "epoch": 0.16238055537741294, + "grad_norm": 57.45986557006836, + "learning_rate": 5e-06, + "loss": 1.4382, + "num_input_tokens_seen": 79970964, + "step": 633 + }, + { + "epoch": 0.16238055537741294, + "loss": 1.5227338075637817, + "loss_ce": 0.0012494265101850033, + "loss_iou": 0.69140625, + "loss_num": 0.0274658203125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 79970964, + "step": 633 + }, + { + "epoch": 0.1626370807413583, + "grad_norm": 83.04951477050781, + "learning_rate": 5e-06, + "loss": 1.6025, + "num_input_tokens_seen": 80096488, + "step": 634 + }, + { + "epoch": 0.1626370807413583, + "loss": 1.6235147714614868, + "loss_ce": 0.002420944394543767, + "loss_iou": 0.734375, + "loss_num": 0.030517578125, + "loss_xval": 1.625, + "num_input_tokens_seen": 80096488, + "step": 634 + }, + { + "epoch": 0.16289360610530365, + "grad_norm": 48.508968353271484, + "learning_rate": 5e-06, + "loss": 1.8204, + "num_input_tokens_seen": 80222752, + "step": 635 + }, + { + "epoch": 0.16289360610530365, + "loss": 1.86301851272583, + "loss_ce": 0.002666852902621031, + "loss_iou": 0.83984375, + "loss_num": 0.0361328125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 80222752, + "step": 635 + }, + { + "epoch": 0.16315013146924903, + "grad_norm": 42.72905349731445, + "learning_rate": 5e-06, + "loss": 1.4732, + "num_input_tokens_seen": 80350164, + "step": 636 + }, + { + "epoch": 0.16315013146924903, + "loss": 1.5267226696014404, + "loss_ce": 0.0032851214054971933, + "loss_iou": 0.69140625, + "loss_num": 0.027587890625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 80350164, + "step": 636 + }, + { + "epoch": 0.16340665683319439, + "grad_norm": 86.05872344970703, + "learning_rate": 5e-06, + "loss": 1.4522, + "num_input_tokens_seen": 80476984, + "step": 637 + }, + { + "epoch": 0.16340665683319439, + "loss": 1.314481496810913, + "loss_ce": 0.00295812520198524, + "loss_iou": 0.6171875, + "loss_num": 0.0145263671875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 80476984, + "step": 637 + }, + { + "epoch": 0.16366318219713974, + "grad_norm": 48.6816291809082, + "learning_rate": 5e-06, + "loss": 1.6099, + "num_input_tokens_seen": 80604020, + "step": 638 + }, + { + "epoch": 0.16366318219713974, + "loss": 1.511645793914795, + "loss_ce": 0.0028566729743033648, + "loss_iou": 0.70703125, + "loss_num": 0.0186767578125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 80604020, + "step": 638 + }, + { + "epoch": 0.1639197075610851, + "grad_norm": 44.24678421020508, + "learning_rate": 5e-06, + "loss": 1.518, + "num_input_tokens_seen": 80729944, + "step": 639 + }, + { + "epoch": 0.1639197075610851, + "loss": 1.5194647312164307, + "loss_ce": 0.0009101370815187693, + "loss_iou": 0.70703125, + "loss_num": 0.0203857421875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 80729944, + "step": 639 + }, + { + "epoch": 0.16417623292503047, + "grad_norm": 52.118858337402344, + "learning_rate": 5e-06, + "loss": 1.5125, + "num_input_tokens_seen": 80856280, + "step": 640 + }, + { + "epoch": 0.16417623292503047, + "loss": 1.400589942932129, + "loss_ce": 0.00312904198653996, + "loss_iou": 0.640625, + "loss_num": 0.02294921875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 80856280, + "step": 640 + }, + { + "epoch": 0.16443275828897583, + "grad_norm": 293.4383239746094, + "learning_rate": 5e-06, + "loss": 1.4327, + "num_input_tokens_seen": 80983024, + "step": 641 + }, + { + "epoch": 0.16443275828897583, + "loss": 1.3168811798095703, + "loss_ce": 0.0024281772784888744, + "loss_iou": 0.6171875, + "loss_num": 0.01611328125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 80983024, + "step": 641 + }, + { + "epoch": 0.16468928365292118, + "grad_norm": 40.556331634521484, + "learning_rate": 5e-06, + "loss": 1.7595, + "num_input_tokens_seen": 81108952, + "step": 642 + }, + { + "epoch": 0.16468928365292118, + "loss": 1.9438207149505615, + "loss_ce": 0.003390980651602149, + "loss_iou": 0.87109375, + "loss_num": 0.03955078125, + "loss_xval": 1.9375, + "num_input_tokens_seen": 81108952, + "step": 642 + }, + { + "epoch": 0.16494580901686653, + "grad_norm": 71.15190124511719, + "learning_rate": 5e-06, + "loss": 1.4901, + "num_input_tokens_seen": 81235368, + "step": 643 + }, + { + "epoch": 0.16494580901686653, + "loss": 1.5523324012756348, + "loss_ce": 0.0044809505343437195, + "loss_iou": 0.69921875, + "loss_num": 0.0303955078125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 81235368, + "step": 643 + }, + { + "epoch": 0.16520233438081192, + "grad_norm": 71.93428039550781, + "learning_rate": 5e-06, + "loss": 1.5502, + "num_input_tokens_seen": 81362580, + "step": 644 + }, + { + "epoch": 0.16520233438081192, + "loss": 1.6351580619812012, + "loss_ce": 0.002345508197322488, + "loss_iou": 0.75, + "loss_num": 0.0263671875, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 81362580, + "step": 644 + }, + { + "epoch": 0.16545885974475727, + "grad_norm": 53.46624755859375, + "learning_rate": 5e-06, + "loss": 1.7735, + "num_input_tokens_seen": 81489448, + "step": 645 + }, + { + "epoch": 0.16545885974475727, + "loss": 1.789193868637085, + "loss_ce": 0.0011079362593591213, + "loss_iou": 0.7890625, + "loss_num": 0.0419921875, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 81489448, + "step": 645 + }, + { + "epoch": 0.16571538510870262, + "grad_norm": 85.6688003540039, + "learning_rate": 5e-06, + "loss": 1.4593, + "num_input_tokens_seen": 81616292, + "step": 646 + }, + { + "epoch": 0.16571538510870262, + "loss": 1.557888150215149, + "loss_ce": 0.00417720153927803, + "loss_iou": 0.703125, + "loss_num": 0.029296875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 81616292, + "step": 646 + }, + { + "epoch": 0.16597191047264798, + "grad_norm": 31.81472396850586, + "learning_rate": 5e-06, + "loss": 1.4939, + "num_input_tokens_seen": 81741648, + "step": 647 + }, + { + "epoch": 0.16597191047264798, + "loss": 1.41986083984375, + "loss_ce": 0.0023804549127817154, + "loss_iou": 0.6484375, + "loss_num": 0.0235595703125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 81741648, + "step": 647 + }, + { + "epoch": 0.16622843583659333, + "grad_norm": 53.631752014160156, + "learning_rate": 5e-06, + "loss": 1.6074, + "num_input_tokens_seen": 81868228, + "step": 648 + }, + { + "epoch": 0.16622843583659333, + "loss": 1.5203707218170166, + "loss_ce": 0.003769185394048691, + "loss_iou": 0.6953125, + "loss_num": 0.025146484375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 81868228, + "step": 648 + }, + { + "epoch": 0.1664849612005387, + "grad_norm": 50.211021423339844, + "learning_rate": 5e-06, + "loss": 1.5642, + "num_input_tokens_seen": 81995368, + "step": 649 + }, + { + "epoch": 0.1664849612005387, + "loss": 1.7067978382110596, + "loss_ce": 0.0007431370904669166, + "loss_iou": 0.76953125, + "loss_num": 0.033447265625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 81995368, + "step": 649 + }, + { + "epoch": 0.16674148656448406, + "grad_norm": 82.01054382324219, + "learning_rate": 5e-06, + "loss": 1.5109, + "num_input_tokens_seen": 82120904, + "step": 650 + }, + { + "epoch": 0.16674148656448406, + "loss": 1.7389417886734009, + "loss_ce": 0.0035902070812880993, + "loss_iou": 0.796875, + "loss_num": 0.0281982421875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 82120904, + "step": 650 + }, + { + "epoch": 0.16699801192842942, + "grad_norm": 47.11616134643555, + "learning_rate": 5e-06, + "loss": 1.6673, + "num_input_tokens_seen": 82246992, + "step": 651 + }, + { + "epoch": 0.16699801192842942, + "loss": 1.9247736930847168, + "loss_ce": 0.001922201132401824, + "loss_iou": 0.86328125, + "loss_num": 0.039306640625, + "loss_xval": 1.921875, + "num_input_tokens_seen": 82246992, + "step": 651 + }, + { + "epoch": 0.16725453729237477, + "grad_norm": 52.35890197753906, + "learning_rate": 5e-06, + "loss": 1.4383, + "num_input_tokens_seen": 82374624, + "step": 652 + }, + { + "epoch": 0.16725453729237477, + "loss": 1.5015100240707397, + "loss_ce": 0.001510057132691145, + "loss_iou": 0.6953125, + "loss_num": 0.021240234375, + "loss_xval": 1.5, + "num_input_tokens_seen": 82374624, + "step": 652 + }, + { + "epoch": 0.16751106265632015, + "grad_norm": 85.18927764892578, + "learning_rate": 5e-06, + "loss": 1.5318, + "num_input_tokens_seen": 82501116, + "step": 653 + }, + { + "epoch": 0.16751106265632015, + "loss": 1.6697872877120972, + "loss_ce": 0.0032833926379680634, + "loss_iou": 0.75390625, + "loss_num": 0.032470703125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 82501116, + "step": 653 + }, + { + "epoch": 0.1677675880202655, + "grad_norm": 46.05815124511719, + "learning_rate": 5e-06, + "loss": 1.4221, + "num_input_tokens_seen": 82626200, + "step": 654 + }, + { + "epoch": 0.1677675880202655, + "loss": 1.2765986919403076, + "loss_ce": 0.001207967521622777, + "loss_iou": 0.58984375, + "loss_num": 0.0184326171875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 82626200, + "step": 654 + }, + { + "epoch": 0.16802411338421086, + "grad_norm": 41.75895690917969, + "learning_rate": 5e-06, + "loss": 1.5139, + "num_input_tokens_seen": 82751012, + "step": 655 + }, + { + "epoch": 0.16802411338421086, + "loss": 1.4482321739196777, + "loss_ce": 0.0009665294783189893, + "loss_iou": 0.67578125, + "loss_num": 0.0186767578125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 82751012, + "step": 655 + }, + { + "epoch": 0.1682806387481562, + "grad_norm": 40.624298095703125, + "learning_rate": 5e-06, + "loss": 1.424, + "num_input_tokens_seen": 82876864, + "step": 656 + }, + { + "epoch": 0.1682806387481562, + "loss": 1.5138459205627441, + "loss_ce": 0.0031036450527608395, + "loss_iou": 0.67578125, + "loss_num": 0.032470703125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 82876864, + "step": 656 + }, + { + "epoch": 0.1685371641121016, + "grad_norm": 54.69596481323242, + "learning_rate": 5e-06, + "loss": 1.4201, + "num_input_tokens_seen": 83003076, + "step": 657 + }, + { + "epoch": 0.1685371641121016, + "loss": 1.4386528730392456, + "loss_ce": 0.0006645869580097497, + "loss_iou": 0.65625, + "loss_num": 0.024658203125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 83003076, + "step": 657 + }, + { + "epoch": 0.16879368947604695, + "grad_norm": 45.89189529418945, + "learning_rate": 5e-06, + "loss": 1.7339, + "num_input_tokens_seen": 83129220, + "step": 658 + }, + { + "epoch": 0.16879368947604695, + "loss": 1.5292792320251465, + "loss_ce": 0.003888575593009591, + "loss_iou": 0.69921875, + "loss_num": 0.025146484375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 83129220, + "step": 658 + }, + { + "epoch": 0.1690502148399923, + "grad_norm": 41.78361892700195, + "learning_rate": 5e-06, + "loss": 1.3603, + "num_input_tokens_seen": 83255556, + "step": 659 + }, + { + "epoch": 0.1690502148399923, + "loss": 1.4428997039794922, + "loss_ce": 0.0019817196298390627, + "loss_iou": 0.66796875, + "loss_num": 0.0216064453125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 83255556, + "step": 659 + }, + { + "epoch": 0.16930674020393766, + "grad_norm": 50.292850494384766, + "learning_rate": 5e-06, + "loss": 1.4751, + "num_input_tokens_seen": 83381916, + "step": 660 + }, + { + "epoch": 0.16930674020393766, + "loss": 1.495980978012085, + "loss_ce": 0.002816832857206464, + "loss_iou": 0.68359375, + "loss_num": 0.0245361328125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 83381916, + "step": 660 + }, + { + "epoch": 0.16956326556788304, + "grad_norm": 60.592010498046875, + "learning_rate": 5e-06, + "loss": 1.4469, + "num_input_tokens_seen": 83509156, + "step": 661 + }, + { + "epoch": 0.16956326556788304, + "loss": 1.6751995086669922, + "loss_ce": 0.002347934525460005, + "loss_iou": 0.7578125, + "loss_num": 0.031494140625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 83509156, + "step": 661 + }, + { + "epoch": 0.1698197909318284, + "grad_norm": 45.439449310302734, + "learning_rate": 5e-06, + "loss": 1.586, + "num_input_tokens_seen": 83635688, + "step": 662 + }, + { + "epoch": 0.1698197909318284, + "loss": 1.609999179840088, + "loss_ce": 0.0006241374649107456, + "loss_iou": 0.75, + "loss_num": 0.02099609375, + "loss_xval": 1.609375, + "num_input_tokens_seen": 83635688, + "step": 662 + }, + { + "epoch": 0.17007631629577374, + "grad_norm": 34.147151947021484, + "learning_rate": 5e-06, + "loss": 1.4549, + "num_input_tokens_seen": 83761020, + "step": 663 + }, + { + "epoch": 0.17007631629577374, + "loss": 1.1593257188796997, + "loss_ce": 0.0025874499697238207, + "loss_iou": 0.546875, + "loss_num": 0.0130615234375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 83761020, + "step": 663 + }, + { + "epoch": 0.1703328416597191, + "grad_norm": 45.43739318847656, + "learning_rate": 5e-06, + "loss": 1.3533, + "num_input_tokens_seen": 83887412, + "step": 664 + }, + { + "epoch": 0.1703328416597191, + "loss": 1.2115195989608765, + "loss_ce": 0.0005821146187372506, + "loss_iou": 0.5703125, + "loss_num": 0.01446533203125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 83887412, + "step": 664 + }, + { + "epoch": 0.17058936702366445, + "grad_norm": 77.38323974609375, + "learning_rate": 5e-06, + "loss": 1.5332, + "num_input_tokens_seen": 84013928, + "step": 665 + }, + { + "epoch": 0.17058936702366445, + "loss": 1.4861634969711304, + "loss_ce": 0.0008119468111544847, + "loss_iou": 0.6875, + "loss_num": 0.0216064453125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 84013928, + "step": 665 + }, + { + "epoch": 0.17084589238760983, + "grad_norm": 56.8577880859375, + "learning_rate": 5e-06, + "loss": 1.5447, + "num_input_tokens_seen": 84141456, + "step": 666 + }, + { + "epoch": 0.17084589238760983, + "loss": 1.536839246749878, + "loss_ce": 0.002659496618434787, + "loss_iou": 0.703125, + "loss_num": 0.02587890625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 84141456, + "step": 666 + }, + { + "epoch": 0.17110241775155519, + "grad_norm": 41.002830505371094, + "learning_rate": 5e-06, + "loss": 1.5417, + "num_input_tokens_seen": 84266544, + "step": 667 + }, + { + "epoch": 0.17110241775155519, + "loss": 1.671589732170105, + "loss_ce": 0.0031326464377343655, + "loss_iou": 0.7578125, + "loss_num": 0.030517578125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 84266544, + "step": 667 + }, + { + "epoch": 0.17135894311550054, + "grad_norm": 49.825828552246094, + "learning_rate": 5e-06, + "loss": 1.3499, + "num_input_tokens_seen": 84393068, + "step": 668 + }, + { + "epoch": 0.17135894311550054, + "loss": 1.5565457344055176, + "loss_ce": 0.0018581650219857693, + "loss_iou": 0.7109375, + "loss_num": 0.0264892578125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 84393068, + "step": 668 + }, + { + "epoch": 0.1716154684794459, + "grad_norm": 43.85466766357422, + "learning_rate": 5e-06, + "loss": 1.3726, + "num_input_tokens_seen": 84519164, + "step": 669 + }, + { + "epoch": 0.1716154684794459, + "loss": 1.3671770095825195, + "loss_ce": 0.0014543866273015738, + "loss_iou": 0.63671875, + "loss_num": 0.0191650390625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 84519164, + "step": 669 + }, + { + "epoch": 0.17187199384339127, + "grad_norm": 48.001033782958984, + "learning_rate": 5e-06, + "loss": 1.4977, + "num_input_tokens_seen": 84646400, + "step": 670 + }, + { + "epoch": 0.17187199384339127, + "loss": 1.515437126159668, + "loss_ce": 0.004206589423120022, + "loss_iou": 0.6875, + "loss_num": 0.02685546875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 84646400, + "step": 670 + }, + { + "epoch": 0.17212851920733663, + "grad_norm": 51.94670486450195, + "learning_rate": 5e-06, + "loss": 1.5912, + "num_input_tokens_seen": 84772392, + "step": 671 + }, + { + "epoch": 0.17212851920733663, + "loss": 1.3646857738494873, + "loss_ce": 0.0014045286225154996, + "loss_iou": 0.64453125, + "loss_num": 0.01434326171875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 84772392, + "step": 671 + }, + { + "epoch": 0.17238504457128198, + "grad_norm": 67.07181549072266, + "learning_rate": 5e-06, + "loss": 1.3986, + "num_input_tokens_seen": 84899036, + "step": 672 + }, + { + "epoch": 0.17238504457128198, + "loss": 1.4572046995162964, + "loss_ce": 0.003103126771748066, + "loss_iou": 0.6640625, + "loss_num": 0.0260009765625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 84899036, + "step": 672 + }, + { + "epoch": 0.17264156993522733, + "grad_norm": 49.09014129638672, + "learning_rate": 5e-06, + "loss": 1.4432, + "num_input_tokens_seen": 85025852, + "step": 673 + }, + { + "epoch": 0.17264156993522733, + "loss": 1.4587152004241943, + "loss_ce": 0.0007074117311276495, + "loss_iou": 0.66796875, + "loss_num": 0.0242919921875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 85025852, + "step": 673 + }, + { + "epoch": 0.17289809529917272, + "grad_norm": 44.313934326171875, + "learning_rate": 5e-06, + "loss": 1.5034, + "num_input_tokens_seen": 85152016, + "step": 674 + }, + { + "epoch": 0.17289809529917272, + "loss": 1.471702218055725, + "loss_ce": 0.0029522436670958996, + "loss_iou": 0.671875, + "loss_num": 0.024169921875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 85152016, + "step": 674 + }, + { + "epoch": 0.17315462066311807, + "grad_norm": 46.13582229614258, + "learning_rate": 5e-06, + "loss": 1.4027, + "num_input_tokens_seen": 85278176, + "step": 675 + }, + { + "epoch": 0.17315462066311807, + "loss": 1.2820916175842285, + "loss_ce": 0.0027948308270424604, + "loss_iou": 0.58203125, + "loss_num": 0.0225830078125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 85278176, + "step": 675 + }, + { + "epoch": 0.17341114602706342, + "grad_norm": 74.892333984375, + "learning_rate": 5e-06, + "loss": 1.4818, + "num_input_tokens_seen": 85403940, + "step": 676 + }, + { + "epoch": 0.17341114602706342, + "loss": 1.5265682935714722, + "loss_ce": 0.004107424523681402, + "loss_iou": 0.6875, + "loss_num": 0.0299072265625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 85403940, + "step": 676 + }, + { + "epoch": 0.17366767139100878, + "grad_norm": 48.4569206237793, + "learning_rate": 5e-06, + "loss": 1.7342, + "num_input_tokens_seen": 85530044, + "step": 677 + }, + { + "epoch": 0.17366767139100878, + "loss": 1.4271458387374878, + "loss_ce": 0.0023411789443343878, + "loss_iou": 0.65625, + "loss_num": 0.021240234375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 85530044, + "step": 677 + }, + { + "epoch": 0.17392419675495416, + "grad_norm": 52.35408020019531, + "learning_rate": 5e-06, + "loss": 1.4532, + "num_input_tokens_seen": 85656044, + "step": 678 + }, + { + "epoch": 0.17392419675495416, + "loss": 1.2278435230255127, + "loss_ce": 0.0012809957843273878, + "loss_iou": 0.5703125, + "loss_num": 0.017578125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 85656044, + "step": 678 + }, + { + "epoch": 0.1741807221188995, + "grad_norm": 52.754554748535156, + "learning_rate": 5e-06, + "loss": 1.5358, + "num_input_tokens_seen": 85782504, + "step": 679 + }, + { + "epoch": 0.1741807221188995, + "loss": 1.597200632095337, + "loss_ce": 0.002474140143021941, + "loss_iou": 0.71484375, + "loss_num": 0.033203125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 85782504, + "step": 679 + }, + { + "epoch": 0.17443724748284486, + "grad_norm": 50.909908294677734, + "learning_rate": 5e-06, + "loss": 1.419, + "num_input_tokens_seen": 85908784, + "step": 680 + }, + { + "epoch": 0.17443724748284486, + "loss": 1.1967666149139404, + "loss_ce": 0.001454145647585392, + "loss_iou": 0.55859375, + "loss_num": 0.0157470703125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 85908784, + "step": 680 + }, + { + "epoch": 0.17469377284679022, + "grad_norm": 37.427616119384766, + "learning_rate": 5e-06, + "loss": 1.374, + "num_input_tokens_seen": 86034280, + "step": 681 + }, + { + "epoch": 0.17469377284679022, + "loss": 1.3947467803955078, + "loss_ce": 0.008028069511055946, + "loss_iou": 0.63671875, + "loss_num": 0.0230712890625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 86034280, + "step": 681 + }, + { + "epoch": 0.1749502982107356, + "grad_norm": 56.57387924194336, + "learning_rate": 5e-06, + "loss": 1.5934, + "num_input_tokens_seen": 86161484, + "step": 682 + }, + { + "epoch": 0.1749502982107356, + "loss": 1.6412622928619385, + "loss_ce": 0.002590326825156808, + "loss_iou": 0.734375, + "loss_num": 0.0341796875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 86161484, + "step": 682 + }, + { + "epoch": 0.17520682357468095, + "grad_norm": 48.387054443359375, + "learning_rate": 5e-06, + "loss": 1.3803, + "num_input_tokens_seen": 86287476, + "step": 683 + }, + { + "epoch": 0.17520682357468095, + "loss": 1.4630337953567505, + "loss_ce": 0.0016079850029200315, + "loss_iou": 0.6640625, + "loss_num": 0.0264892578125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 86287476, + "step": 683 + }, + { + "epoch": 0.1754633489386263, + "grad_norm": 68.5193862915039, + "learning_rate": 5e-06, + "loss": 1.4366, + "num_input_tokens_seen": 86413448, + "step": 684 + }, + { + "epoch": 0.1754633489386263, + "loss": 1.517972707748413, + "loss_ce": 0.004300874192267656, + "loss_iou": 0.6875, + "loss_num": 0.027099609375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 86413448, + "step": 684 + }, + { + "epoch": 0.17571987430257166, + "grad_norm": 53.40635681152344, + "learning_rate": 5e-06, + "loss": 1.7955, + "num_input_tokens_seen": 86540748, + "step": 685 + }, + { + "epoch": 0.17571987430257166, + "loss": 1.738936185836792, + "loss_ce": 0.002608025912195444, + "loss_iou": 0.78125, + "loss_num": 0.033935546875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 86540748, + "step": 685 + }, + { + "epoch": 0.175976399666517, + "grad_norm": 44.375953674316406, + "learning_rate": 5e-06, + "loss": 1.4779, + "num_input_tokens_seen": 86667316, + "step": 686 + }, + { + "epoch": 0.175976399666517, + "loss": 1.3921006917953491, + "loss_ce": 0.0024522070307284594, + "loss_iou": 0.625, + "loss_num": 0.02880859375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 86667316, + "step": 686 + }, + { + "epoch": 0.1762329250304624, + "grad_norm": 52.596275329589844, + "learning_rate": 5e-06, + "loss": 1.2845, + "num_input_tokens_seen": 86793888, + "step": 687 + }, + { + "epoch": 0.1762329250304624, + "loss": 1.2361373901367188, + "loss_ce": 0.003715515835210681, + "loss_iou": 0.58203125, + "loss_num": 0.01422119140625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 86793888, + "step": 687 + }, + { + "epoch": 0.17648945039440775, + "grad_norm": 53.17640686035156, + "learning_rate": 5e-06, + "loss": 1.4964, + "num_input_tokens_seen": 86920112, + "step": 688 + }, + { + "epoch": 0.17648945039440775, + "loss": 1.6507009267807007, + "loss_ce": 0.0022633527405560017, + "loss_iou": 0.75, + "loss_num": 0.02880859375, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 86920112, + "step": 688 + }, + { + "epoch": 0.1767459757583531, + "grad_norm": 54.21208190917969, + "learning_rate": 5e-06, + "loss": 1.499, + "num_input_tokens_seen": 87046320, + "step": 689 + }, + { + "epoch": 0.1767459757583531, + "loss": 1.5389537811279297, + "loss_ce": 0.006727222353219986, + "loss_iou": 0.703125, + "loss_num": 0.025390625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 87046320, + "step": 689 + }, + { + "epoch": 0.17700250112229846, + "grad_norm": 106.88327026367188, + "learning_rate": 5e-06, + "loss": 1.5475, + "num_input_tokens_seen": 87174048, + "step": 690 + }, + { + "epoch": 0.17700250112229846, + "loss": 1.5167627334594727, + "loss_ce": 0.003090939950197935, + "loss_iou": 0.6796875, + "loss_num": 0.03173828125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 87174048, + "step": 690 + }, + { + "epoch": 0.17725902648624384, + "grad_norm": 37.57487869262695, + "learning_rate": 5e-06, + "loss": 1.7019, + "num_input_tokens_seen": 87299864, + "step": 691 + }, + { + "epoch": 0.17725902648624384, + "loss": 1.6991196870803833, + "loss_ce": 0.0008774226880632341, + "loss_iou": 0.78125, + "loss_num": 0.026611328125, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 87299864, + "step": 691 + }, + { + "epoch": 0.1775155518501892, + "grad_norm": 35.29161834716797, + "learning_rate": 5e-06, + "loss": 1.4426, + "num_input_tokens_seen": 87425864, + "step": 692 + }, + { + "epoch": 0.1775155518501892, + "loss": 1.3353677988052368, + "loss_ce": 0.001383333932608366, + "loss_iou": 0.625, + "loss_num": 0.0169677734375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 87425864, + "step": 692 + }, + { + "epoch": 0.17777207721413454, + "grad_norm": 43.37476348876953, + "learning_rate": 5e-06, + "loss": 1.4744, + "num_input_tokens_seen": 87552388, + "step": 693 + }, + { + "epoch": 0.17777207721413454, + "loss": 1.5766165256500244, + "loss_ce": 0.002397890668362379, + "loss_iou": 0.71484375, + "loss_num": 0.02880859375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 87552388, + "step": 693 + }, + { + "epoch": 0.1780286025780799, + "grad_norm": 57.102535247802734, + "learning_rate": 5e-06, + "loss": 1.4526, + "num_input_tokens_seen": 87678372, + "step": 694 + }, + { + "epoch": 0.1780286025780799, + "loss": 1.2360501289367676, + "loss_ce": 0.0006985421641729772, + "loss_iou": 0.578125, + "loss_num": 0.0152587890625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 87678372, + "step": 694 + }, + { + "epoch": 0.17828512794202528, + "grad_norm": 48.99152755737305, + "learning_rate": 5e-06, + "loss": 1.7008, + "num_input_tokens_seen": 87803972, + "step": 695 + }, + { + "epoch": 0.17828512794202528, + "loss": 1.7282692193984985, + "loss_ce": 0.002683269325643778, + "loss_iou": 0.78515625, + "loss_num": 0.031005859375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 87803972, + "step": 695 + }, + { + "epoch": 0.17854165330597063, + "grad_norm": 76.05008697509766, + "learning_rate": 5e-06, + "loss": 1.4, + "num_input_tokens_seen": 87929752, + "step": 696 + }, + { + "epoch": 0.17854165330597063, + "loss": 1.3848971128463745, + "loss_ce": 0.005502617917954922, + "loss_iou": 0.62109375, + "loss_num": 0.0269775390625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 87929752, + "step": 696 + }, + { + "epoch": 0.17879817866991599, + "grad_norm": 111.44060516357422, + "learning_rate": 5e-06, + "loss": 1.6454, + "num_input_tokens_seen": 88055512, + "step": 697 + }, + { + "epoch": 0.17879817866991599, + "loss": 1.6854281425476074, + "loss_ce": 0.002810893813148141, + "loss_iou": 0.73828125, + "loss_num": 0.04150390625, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 88055512, + "step": 697 + }, + { + "epoch": 0.17905470403386134, + "grad_norm": 48.01878356933594, + "learning_rate": 5e-06, + "loss": 1.501, + "num_input_tokens_seen": 88181984, + "step": 698 + }, + { + "epoch": 0.17905470403386134, + "loss": 1.3288600444793701, + "loss_ce": 0.0017115051159635186, + "loss_iou": 0.60546875, + "loss_num": 0.022705078125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 88181984, + "step": 698 + }, + { + "epoch": 0.17931122939780672, + "grad_norm": 81.63150787353516, + "learning_rate": 5e-06, + "loss": 1.4163, + "num_input_tokens_seen": 88308080, + "step": 699 + }, + { + "epoch": 0.17931122939780672, + "loss": 1.3433727025985718, + "loss_ce": 0.002064086962491274, + "loss_iou": 0.61328125, + "loss_num": 0.022216796875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 88308080, + "step": 699 + }, + { + "epoch": 0.17956775476175207, + "grad_norm": 51.78082275390625, + "learning_rate": 5e-06, + "loss": 1.5951, + "num_input_tokens_seen": 88434724, + "step": 700 + }, + { + "epoch": 0.17956775476175207, + "loss": 1.7339545488357544, + "loss_ce": 0.008368520066142082, + "loss_iou": 0.7734375, + "loss_num": 0.036376953125, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 88434724, + "step": 700 + }, + { + "epoch": 0.17982428012569743, + "grad_norm": 61.713706970214844, + "learning_rate": 5e-06, + "loss": 1.4761, + "num_input_tokens_seen": 88559552, + "step": 701 + }, + { + "epoch": 0.17982428012569743, + "loss": 1.4185237884521484, + "loss_ce": 0.004461327102035284, + "loss_iou": 0.671875, + "loss_num": 0.013671875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 88559552, + "step": 701 + }, + { + "epoch": 0.18008080548964278, + "grad_norm": 53.97096252441406, + "learning_rate": 5e-06, + "loss": 1.6532, + "num_input_tokens_seen": 88686372, + "step": 702 + }, + { + "epoch": 0.18008080548964278, + "loss": 1.5229980945587158, + "loss_ce": 0.001513659954071045, + "loss_iou": 0.70703125, + "loss_num": 0.022216796875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 88686372, + "step": 702 + }, + { + "epoch": 0.18033733085358813, + "grad_norm": 92.05032348632812, + "learning_rate": 5e-06, + "loss": 1.3969, + "num_input_tokens_seen": 88813596, + "step": 703 + }, + { + "epoch": 0.18033733085358813, + "loss": 1.4159787893295288, + "loss_ce": 0.0028928820975124836, + "loss_iou": 0.65234375, + "loss_num": 0.0211181640625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 88813596, + "step": 703 + }, + { + "epoch": 0.18059385621753352, + "grad_norm": 35.70454406738281, + "learning_rate": 5e-06, + "loss": 1.3612, + "num_input_tokens_seen": 88939532, + "step": 704 + }, + { + "epoch": 0.18059385621753352, + "loss": 1.3291815519332886, + "loss_ce": 0.004962760955095291, + "loss_iou": 0.62890625, + "loss_num": 0.012939453125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 88939532, + "step": 704 + }, + { + "epoch": 0.18085038158147887, + "grad_norm": 57.268531799316406, + "learning_rate": 5e-06, + "loss": 1.5287, + "num_input_tokens_seen": 89066196, + "step": 705 + }, + { + "epoch": 0.18085038158147887, + "loss": 1.3902287483215332, + "loss_ce": 0.0005803777603432536, + "loss_iou": 0.65625, + "loss_num": 0.01507568359375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 89066196, + "step": 705 + }, + { + "epoch": 0.18110690694542422, + "grad_norm": 56.077842712402344, + "learning_rate": 5e-06, + "loss": 1.6653, + "num_input_tokens_seen": 89193192, + "step": 706 + }, + { + "epoch": 0.18110690694542422, + "loss": 1.7315073013305664, + "loss_ce": 0.0010385378263890743, + "loss_iou": 0.7890625, + "loss_num": 0.0306396484375, + "loss_xval": 1.734375, + "num_input_tokens_seen": 89193192, + "step": 706 + }, + { + "epoch": 0.18136343230936958, + "grad_norm": 64.09819030761719, + "learning_rate": 5e-06, + "loss": 1.4835, + "num_input_tokens_seen": 89319032, + "step": 707 + }, + { + "epoch": 0.18136343230936958, + "loss": 1.4756860733032227, + "loss_ce": 0.004982961807399988, + "loss_iou": 0.66796875, + "loss_num": 0.0269775390625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 89319032, + "step": 707 + }, + { + "epoch": 0.18161995767331496, + "grad_norm": 38.08344650268555, + "learning_rate": 5e-06, + "loss": 1.4401, + "num_input_tokens_seen": 89444948, + "step": 708 + }, + { + "epoch": 0.18161995767331496, + "loss": 1.4717217683792114, + "loss_ce": 0.0019951669964939356, + "loss_iou": 0.67578125, + "loss_num": 0.0240478515625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 89444948, + "step": 708 + }, + { + "epoch": 0.1818764830372603, + "grad_norm": 58.17047882080078, + "learning_rate": 5e-06, + "loss": 1.4684, + "num_input_tokens_seen": 89570324, + "step": 709 + }, + { + "epoch": 0.1818764830372603, + "loss": 1.6193065643310547, + "loss_ce": 0.007001795340329409, + "loss_iou": 0.7109375, + "loss_num": 0.037109375, + "loss_xval": 1.609375, + "num_input_tokens_seen": 89570324, + "step": 709 + }, + { + "epoch": 0.18213300840120566, + "grad_norm": 60.13876724243164, + "learning_rate": 5e-06, + "loss": 1.5736, + "num_input_tokens_seen": 89698004, + "step": 710 + }, + { + "epoch": 0.18213300840120566, + "loss": 1.5884690284729004, + "loss_ce": 0.005949506536126137, + "loss_iou": 0.73046875, + "loss_num": 0.02490234375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 89698004, + "step": 710 + }, + { + "epoch": 0.18238953376515102, + "grad_norm": 65.3223648071289, + "learning_rate": 5e-06, + "loss": 1.3573, + "num_input_tokens_seen": 89823896, + "step": 711 + }, + { + "epoch": 0.18238953376515102, + "loss": 1.3171595335006714, + "loss_ce": 0.0007532262243330479, + "loss_iou": 0.6171875, + "loss_num": 0.015625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 89823896, + "step": 711 + }, + { + "epoch": 0.1826460591290964, + "grad_norm": 50.57582473754883, + "learning_rate": 5e-06, + "loss": 1.478, + "num_input_tokens_seen": 89949416, + "step": 712 + }, + { + "epoch": 0.1826460591290964, + "loss": 1.5792417526245117, + "loss_ce": 0.0030699511989951134, + "loss_iou": 0.72265625, + "loss_num": 0.0255126953125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 89949416, + "step": 712 + }, + { + "epoch": 0.18290258449304175, + "grad_norm": 51.11810302734375, + "learning_rate": 5e-06, + "loss": 1.4436, + "num_input_tokens_seen": 90075980, + "step": 713 + }, + { + "epoch": 0.18290258449304175, + "loss": 1.4309558868408203, + "loss_ce": 0.002244966570287943, + "loss_iou": 0.65625, + "loss_num": 0.02294921875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 90075980, + "step": 713 + }, + { + "epoch": 0.1831591098569871, + "grad_norm": 43.62511444091797, + "learning_rate": 5e-06, + "loss": 1.4995, + "num_input_tokens_seen": 90201756, + "step": 714 + }, + { + "epoch": 0.1831591098569871, + "loss": 1.4851667881011963, + "loss_ce": 0.0017683382611721754, + "loss_iou": 0.68359375, + "loss_num": 0.0233154296875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 90201756, + "step": 714 + }, + { + "epoch": 0.18341563522093246, + "grad_norm": 76.03456115722656, + "learning_rate": 5e-06, + "loss": 1.4729, + "num_input_tokens_seen": 90328752, + "step": 715 + }, + { + "epoch": 0.18341563522093246, + "loss": 1.546578049659729, + "loss_ce": 0.006050685420632362, + "loss_iou": 0.68359375, + "loss_num": 0.034423828125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 90328752, + "step": 715 + }, + { + "epoch": 0.18367216058487784, + "grad_norm": 50.04610061645508, + "learning_rate": 5e-06, + "loss": 1.5025, + "num_input_tokens_seen": 90455132, + "step": 716 + }, + { + "epoch": 0.18367216058487784, + "loss": 1.3907361030578613, + "loss_ce": 0.0020642559975385666, + "loss_iou": 0.64453125, + "loss_num": 0.0198974609375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 90455132, + "step": 716 + }, + { + "epoch": 0.1839286859488232, + "grad_norm": 55.3236198425293, + "learning_rate": 5e-06, + "loss": 1.4647, + "num_input_tokens_seen": 90581768, + "step": 717 + }, + { + "epoch": 0.1839286859488232, + "loss": 1.2487831115722656, + "loss_ce": 0.001224456587806344, + "loss_iou": 0.58984375, + "loss_num": 0.01348876953125, + "loss_xval": 1.25, + "num_input_tokens_seen": 90581768, + "step": 717 + }, + { + "epoch": 0.18418521131276855, + "grad_norm": 49.5446662902832, + "learning_rate": 5e-06, + "loss": 1.6264, + "num_input_tokens_seen": 90707736, + "step": 718 + }, + { + "epoch": 0.18418521131276855, + "loss": 1.538797378540039, + "loss_ce": 0.002664659172296524, + "loss_iou": 0.7109375, + "loss_num": 0.022705078125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 90707736, + "step": 718 + }, + { + "epoch": 0.1844417366767139, + "grad_norm": 101.7543716430664, + "learning_rate": 5e-06, + "loss": 1.4319, + "num_input_tokens_seen": 90835340, + "step": 719 + }, + { + "epoch": 0.1844417366767139, + "loss": 1.3595223426818848, + "loss_ce": 0.0021004541777074337, + "loss_iou": 0.63671875, + "loss_num": 0.0166015625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 90835340, + "step": 719 + }, + { + "epoch": 0.18469826204065928, + "grad_norm": 38.48265838623047, + "learning_rate": 5e-06, + "loss": 1.566, + "num_input_tokens_seen": 90961292, + "step": 720 + }, + { + "epoch": 0.18469826204065928, + "loss": 1.4886090755462646, + "loss_ce": 0.002280927961692214, + "loss_iou": 0.6953125, + "loss_num": 0.018798828125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 90961292, + "step": 720 + }, + { + "epoch": 0.18495478740460464, + "grad_norm": 40.244483947753906, + "learning_rate": 5e-06, + "loss": 1.3516, + "num_input_tokens_seen": 91087004, + "step": 721 + }, + { + "epoch": 0.18495478740460464, + "loss": 1.3487298488616943, + "loss_ce": 0.0015619590412825346, + "loss_iou": 0.6171875, + "loss_num": 0.0233154296875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 91087004, + "step": 721 + }, + { + "epoch": 0.18521131276855, + "grad_norm": 35.15084457397461, + "learning_rate": 5e-06, + "loss": 1.4244, + "num_input_tokens_seen": 91214024, + "step": 722 + }, + { + "epoch": 0.18521131276855, + "loss": 1.5300304889678955, + "loss_ce": 0.0026867641136050224, + "loss_iou": 0.69921875, + "loss_num": 0.0262451171875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 91214024, + "step": 722 + }, + { + "epoch": 0.18546783813249534, + "grad_norm": 79.00787353515625, + "learning_rate": 5e-06, + "loss": 1.6179, + "num_input_tokens_seen": 91340904, + "step": 723 + }, + { + "epoch": 0.18546783813249534, + "loss": 1.5487065315246582, + "loss_ce": 0.0008550257771275938, + "loss_iou": 0.72265625, + "loss_num": 0.0208740234375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 91340904, + "step": 723 + }, + { + "epoch": 0.1857243634964407, + "grad_norm": 41.29024124145508, + "learning_rate": 5e-06, + "loss": 1.6778, + "num_input_tokens_seen": 91466564, + "step": 724 + }, + { + "epoch": 0.1857243634964407, + "loss": 1.5871570110321045, + "loss_ce": 0.0036608753725886345, + "loss_iou": 0.7109375, + "loss_num": 0.03173828125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 91466564, + "step": 724 + }, + { + "epoch": 0.18598088886038608, + "grad_norm": 44.04233932495117, + "learning_rate": 5e-06, + "loss": 1.2749, + "num_input_tokens_seen": 91592404, + "step": 725 + }, + { + "epoch": 0.18598088886038608, + "loss": 1.5660603046417236, + "loss_ce": 0.0020954369101673365, + "loss_iou": 0.6875, + "loss_num": 0.03759765625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 91592404, + "step": 725 + }, + { + "epoch": 0.18623741422433143, + "grad_norm": 54.60873031616211, + "learning_rate": 5e-06, + "loss": 1.3854, + "num_input_tokens_seen": 91718816, + "step": 726 + }, + { + "epoch": 0.18623741422433143, + "loss": 1.4901278018951416, + "loss_ce": 0.00282311555929482, + "loss_iou": 0.6640625, + "loss_num": 0.0308837890625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 91718816, + "step": 726 + }, + { + "epoch": 0.18649393958827679, + "grad_norm": 43.745304107666016, + "learning_rate": 5e-06, + "loss": 1.501, + "num_input_tokens_seen": 91845428, + "step": 727 + }, + { + "epoch": 0.18649393958827679, + "loss": 1.3656672239303589, + "loss_ce": 0.0009211193537339568, + "loss_iou": 0.62890625, + "loss_num": 0.0220947265625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 91845428, + "step": 727 + }, + { + "epoch": 0.18675046495222214, + "grad_norm": 46.35885238647461, + "learning_rate": 5e-06, + "loss": 1.484, + "num_input_tokens_seen": 91971588, + "step": 728 + }, + { + "epoch": 0.18675046495222214, + "loss": 1.3003261089324951, + "loss_ce": 0.001009780098684132, + "loss_iou": 0.61328125, + "loss_num": 0.0145263671875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 91971588, + "step": 728 + }, + { + "epoch": 0.18700699031616752, + "grad_norm": 56.08134460449219, + "learning_rate": 5e-06, + "loss": 1.4602, + "num_input_tokens_seen": 92098052, + "step": 729 + }, + { + "epoch": 0.18700699031616752, + "loss": 1.4000930786132812, + "loss_ce": 0.0021439180709421635, + "loss_iou": 0.66015625, + "loss_num": 0.01611328125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 92098052, + "step": 729 + }, + { + "epoch": 0.18726351568011287, + "grad_norm": 95.84259033203125, + "learning_rate": 5e-06, + "loss": 1.4713, + "num_input_tokens_seen": 92224264, + "step": 730 + }, + { + "epoch": 0.18726351568011287, + "loss": 1.5204403400421143, + "loss_ce": 0.0018856195965781808, + "loss_iou": 0.6875, + "loss_num": 0.0284423828125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 92224264, + "step": 730 + }, + { + "epoch": 0.18752004104405823, + "grad_norm": 43.581787109375, + "learning_rate": 5e-06, + "loss": 1.6635, + "num_input_tokens_seen": 92350128, + "step": 731 + }, + { + "epoch": 0.18752004104405823, + "loss": 1.5579040050506592, + "loss_ce": 0.00028678763192147017, + "loss_iou": 0.7109375, + "loss_num": 0.026611328125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 92350128, + "step": 731 + }, + { + "epoch": 0.18777656640800358, + "grad_norm": 45.12299346923828, + "learning_rate": 5e-06, + "loss": 1.4914, + "num_input_tokens_seen": 92476876, + "step": 732 + }, + { + "epoch": 0.18777656640800358, + "loss": 1.491039514541626, + "loss_ce": 0.002758322050794959, + "loss_iou": 0.65234375, + "loss_num": 0.03662109375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 92476876, + "step": 732 + }, + { + "epoch": 0.18803309177194896, + "grad_norm": 54.27924728393555, + "learning_rate": 5e-06, + "loss": 1.4341, + "num_input_tokens_seen": 92604404, + "step": 733 + }, + { + "epoch": 0.18803309177194896, + "loss": 1.5657432079315186, + "loss_ce": 0.003243240062147379, + "loss_iou": 0.6875, + "loss_num": 0.03759765625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 92604404, + "step": 733 + }, + { + "epoch": 0.18828961713589432, + "grad_norm": 59.329132080078125, + "learning_rate": 5e-06, + "loss": 1.517, + "num_input_tokens_seen": 92730800, + "step": 734 + }, + { + "epoch": 0.18828961713589432, + "loss": 1.7183482646942139, + "loss_ce": 0.003504466963931918, + "loss_iou": 0.78125, + "loss_num": 0.03076171875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 92730800, + "step": 734 + }, + { + "epoch": 0.18854614249983967, + "grad_norm": 72.88380432128906, + "learning_rate": 5e-06, + "loss": 1.4632, + "num_input_tokens_seen": 92856428, + "step": 735 + }, + { + "epoch": 0.18854614249983967, + "loss": 1.7092386484146118, + "loss_ce": 0.00904332846403122, + "loss_iou": 0.7578125, + "loss_num": 0.03759765625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 92856428, + "step": 735 + }, + { + "epoch": 0.18880266786378502, + "grad_norm": 41.17212677001953, + "learning_rate": 5e-06, + "loss": 1.4264, + "num_input_tokens_seen": 92981156, + "step": 736 + }, + { + "epoch": 0.18880266786378502, + "loss": 1.3443195819854736, + "loss_ce": 0.0015460492577403784, + "loss_iou": 0.625, + "loss_num": 0.018310546875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 92981156, + "step": 736 + }, + { + "epoch": 0.1890591932277304, + "grad_norm": 53.11419677734375, + "learning_rate": 5e-06, + "loss": 1.3451, + "num_input_tokens_seen": 93106920, + "step": 737 + }, + { + "epoch": 0.1890591932277304, + "loss": 1.2838952541351318, + "loss_ce": 0.0016686737071722746, + "loss_iou": 0.59375, + "loss_num": 0.0194091796875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 93106920, + "step": 737 + }, + { + "epoch": 0.18931571859167576, + "grad_norm": 49.766700744628906, + "learning_rate": 5e-06, + "loss": 1.6252, + "num_input_tokens_seen": 93232984, + "step": 738 + }, + { + "epoch": 0.18931571859167576, + "loss": 1.608275055885315, + "loss_ce": 0.0018296812195330858, + "loss_iou": 0.7421875, + "loss_num": 0.0238037109375, + "loss_xval": 1.609375, + "num_input_tokens_seen": 93232984, + "step": 738 + }, + { + "epoch": 0.1895722439556211, + "grad_norm": 85.74491882324219, + "learning_rate": 5e-06, + "loss": 1.4723, + "num_input_tokens_seen": 93359004, + "step": 739 + }, + { + "epoch": 0.1895722439556211, + "loss": 1.192650318145752, + "loss_ce": 0.0012441009748727083, + "loss_iou": 0.56640625, + "loss_num": 0.01123046875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 93359004, + "step": 739 + }, + { + "epoch": 0.18982876931956646, + "grad_norm": 43.55925750732422, + "learning_rate": 5e-06, + "loss": 1.3945, + "num_input_tokens_seen": 93485496, + "step": 740 + }, + { + "epoch": 0.18982876931956646, + "loss": 1.568324327468872, + "loss_ce": 0.0038712667301297188, + "loss_iou": 0.7109375, + "loss_num": 0.0284423828125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 93485496, + "step": 740 + }, + { + "epoch": 0.19008529468351182, + "grad_norm": 71.7542953491211, + "learning_rate": 5e-06, + "loss": 1.4572, + "num_input_tokens_seen": 93611524, + "step": 741 + }, + { + "epoch": 0.19008529468351182, + "loss": 1.6233015060424805, + "loss_ce": 0.002207789570093155, + "loss_iou": 0.734375, + "loss_num": 0.029541015625, + "loss_xval": 1.625, + "num_input_tokens_seen": 93611524, + "step": 741 + }, + { + "epoch": 0.1903418200474572, + "grad_norm": 56.21603012084961, + "learning_rate": 5e-06, + "loss": 1.6034, + "num_input_tokens_seen": 93738492, + "step": 742 + }, + { + "epoch": 0.1903418200474572, + "loss": 1.608897089958191, + "loss_ce": 0.0014752072747796774, + "loss_iou": 0.734375, + "loss_num": 0.0283203125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 93738492, + "step": 742 + }, + { + "epoch": 0.19059834541140255, + "grad_norm": 66.59623718261719, + "learning_rate": 5e-06, + "loss": 1.4841, + "num_input_tokens_seen": 93864340, + "step": 743 + }, + { + "epoch": 0.19059834541140255, + "loss": 1.5329947471618652, + "loss_ce": 0.0007681584684178233, + "loss_iou": 0.71484375, + "loss_num": 0.0213623046875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 93864340, + "step": 743 + }, + { + "epoch": 0.1908548707753479, + "grad_norm": 59.25039291381836, + "learning_rate": 5e-06, + "loss": 1.4918, + "num_input_tokens_seen": 93990016, + "step": 744 + }, + { + "epoch": 0.1908548707753479, + "loss": 1.404555082321167, + "loss_ce": 0.007094152271747589, + "loss_iou": 0.63671875, + "loss_num": 0.0245361328125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 93990016, + "step": 744 + }, + { + "epoch": 0.19111139613929326, + "grad_norm": 70.17887878417969, + "learning_rate": 5e-06, + "loss": 1.371, + "num_input_tokens_seen": 94116340, + "step": 745 + }, + { + "epoch": 0.19111139613929326, + "loss": 1.1463377475738525, + "loss_ce": 0.005224438849836588, + "loss_iou": 0.53125, + "loss_num": 0.01519775390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 94116340, + "step": 745 + }, + { + "epoch": 0.19136792150323864, + "grad_norm": 45.97539520263672, + "learning_rate": 5e-06, + "loss": 1.4958, + "num_input_tokens_seen": 94242608, + "step": 746 + }, + { + "epoch": 0.19136792150323864, + "loss": 1.4609613418579102, + "loss_ce": 0.005394945852458477, + "loss_iou": 0.6484375, + "loss_num": 0.031494140625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 94242608, + "step": 746 + }, + { + "epoch": 0.191624446867184, + "grad_norm": 37.92036056518555, + "learning_rate": 5e-06, + "loss": 1.387, + "num_input_tokens_seen": 94368212, + "step": 747 + }, + { + "epoch": 0.191624446867184, + "loss": 1.3088114261627197, + "loss_ce": 0.003635610220953822, + "loss_iou": 0.59765625, + "loss_num": 0.022216796875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 94368212, + "step": 747 + }, + { + "epoch": 0.19188097223112935, + "grad_norm": 30.749116897583008, + "learning_rate": 5e-06, + "loss": 1.3136, + "num_input_tokens_seen": 94492160, + "step": 748 + }, + { + "epoch": 0.19188097223112935, + "loss": 1.2855565547943115, + "loss_ce": 0.0008885765564627945, + "loss_iou": 0.59765625, + "loss_num": 0.01708984375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 94492160, + "step": 748 + }, + { + "epoch": 0.1921374975950747, + "grad_norm": 48.46091079711914, + "learning_rate": 5e-06, + "loss": 1.4013, + "num_input_tokens_seen": 94618756, + "step": 749 + }, + { + "epoch": 0.1921374975950747, + "loss": 1.5843840837478638, + "loss_ce": 0.002352833980694413, + "loss_iou": 0.71484375, + "loss_num": 0.0302734375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 94618756, + "step": 749 + }, + { + "epoch": 0.19239402295902008, + "grad_norm": 52.85329818725586, + "learning_rate": 5e-06, + "loss": 1.364, + "num_input_tokens_seen": 94745468, + "step": 750 + }, + { + "epoch": 0.19239402295902008, + "eval_icons_CIoU": -0.01894897874444723, + "eval_icons_GIoU": -0.04759606532752514, + "eval_icons_IoU": 0.1573554091155529, + "eval_icons_MAE_all": 0.05506601929664612, + "eval_icons_MAE_h": 0.07492277398705482, + "eval_icons_MAE_w": 0.06497159227728844, + "eval_icons_MAE_x_boxes": 0.05891689844429493, + "eval_icons_MAE_y_boxes": 0.06718555651605129, + "eval_icons_NUM_probability": 0.9998254776000977, + "eval_icons_inside_bbox": 0.3385416716337204, + "eval_icons_loss": 2.3671042919158936, + "eval_icons_loss_ce": 0.0007935907924547791, + "eval_icons_loss_iou": 1.0693359375, + "eval_icons_loss_num": 0.05898284912109375, + "eval_icons_loss_xval": 2.43359375, + "eval_icons_runtime": 40.2373, + "eval_icons_samples_per_second": 1.243, + "eval_icons_steps_per_second": 0.05, + "num_input_tokens_seen": 94745468, + "step": 750 + }, + { + "epoch": 0.19239402295902008, + "eval_screenspot_CIoU": 0.058582218984762825, + "eval_screenspot_GIoU": 0.04530087734262148, + "eval_screenspot_IoU": 0.22695686419804892, + "eval_screenspot_MAE_all": 0.08712503562370937, + "eval_screenspot_MAE_h": 0.06914001454909642, + "eval_screenspot_MAE_w": 0.14226938784122467, + "eval_screenspot_MAE_x_boxes": 0.10903208206097285, + "eval_screenspot_MAE_y_boxes": 0.058699255188306175, + "eval_screenspot_NUM_probability": 0.9997839331626892, + "eval_screenspot_inside_bbox": 0.609166661898295, + "eval_screenspot_loss": 2.388688087463379, + "eval_screenspot_loss_ce": 0.005509096353004376, + "eval_screenspot_loss_iou": 0.9768880208333334, + "eval_screenspot_loss_num": 0.09430440266927083, + "eval_screenspot_loss_xval": 2.4264322916666665, + "eval_screenspot_runtime": 67.0868, + "eval_screenspot_samples_per_second": 1.327, + "eval_screenspot_steps_per_second": 0.045, + "num_input_tokens_seen": 94745468, + "step": 750 + }, + { + "epoch": 0.19239402295902008, + "loss": 2.4279370307922363, + "loss_ce": 0.00508531928062439, + "loss_iou": 0.984375, + "loss_num": 0.0908203125, + "loss_xval": 2.421875, + "num_input_tokens_seen": 94745468, + "step": 750 + }, + { + "epoch": 0.19265054832296544, + "grad_norm": 89.9964599609375, + "learning_rate": 5e-06, + "loss": 1.4015, + "num_input_tokens_seen": 94872620, + "step": 751 + }, + { + "epoch": 0.19265054832296544, + "loss": 1.2995052337646484, + "loss_ce": 0.0026302128098905087, + "loss_iou": 0.609375, + "loss_num": 0.0159912109375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 94872620, + "step": 751 + }, + { + "epoch": 0.1929070736869108, + "grad_norm": 44.04330825805664, + "learning_rate": 5e-06, + "loss": 1.5271, + "num_input_tokens_seen": 94998292, + "step": 752 + }, + { + "epoch": 0.1929070736869108, + "loss": 1.4665632247924805, + "loss_ce": 0.0007428829558193684, + "loss_iou": 0.6796875, + "loss_num": 0.021728515625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 94998292, + "step": 752 + }, + { + "epoch": 0.19316359905085614, + "grad_norm": 46.42055892944336, + "learning_rate": 5e-06, + "loss": 1.429, + "num_input_tokens_seen": 95125332, + "step": 753 + }, + { + "epoch": 0.19316359905085614, + "loss": 1.265074372291565, + "loss_ce": 0.0009142071940004826, + "loss_iou": 0.58984375, + "loss_num": 0.017333984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 95125332, + "step": 753 + }, + { + "epoch": 0.19342012441480153, + "grad_norm": 92.1563491821289, + "learning_rate": 5e-06, + "loss": 1.3619, + "num_input_tokens_seen": 95252376, + "step": 754 + }, + { + "epoch": 0.19342012441480153, + "loss": 1.5032415390014648, + "loss_ce": 0.0012883726740255952, + "loss_iou": 0.6796875, + "loss_num": 0.028564453125, + "loss_xval": 1.5, + "num_input_tokens_seen": 95252376, + "step": 754 + }, + { + "epoch": 0.19367664977874688, + "grad_norm": 46.85188674926758, + "learning_rate": 5e-06, + "loss": 1.7522, + "num_input_tokens_seen": 95379464, + "step": 755 + }, + { + "epoch": 0.19367664977874688, + "loss": 1.876871109008789, + "loss_ce": 0.0018711804877966642, + "loss_iou": 0.83984375, + "loss_num": 0.038818359375, + "loss_xval": 1.875, + "num_input_tokens_seen": 95379464, + "step": 755 + }, + { + "epoch": 0.19393317514269223, + "grad_norm": 55.023311614990234, + "learning_rate": 5e-06, + "loss": 1.4824, + "num_input_tokens_seen": 95505724, + "step": 756 + }, + { + "epoch": 0.19393317514269223, + "loss": 1.5398664474487305, + "loss_ce": 0.0012922082096338272, + "loss_iou": 0.68359375, + "loss_num": 0.0341796875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 95505724, + "step": 756 + }, + { + "epoch": 0.19418970050663759, + "grad_norm": 84.13701629638672, + "learning_rate": 5e-06, + "loss": 1.3595, + "num_input_tokens_seen": 95632768, + "step": 757 + }, + { + "epoch": 0.19418970050663759, + "loss": 1.4067058563232422, + "loss_ce": 0.0024089363869279623, + "loss_iou": 0.65625, + "loss_num": 0.0174560546875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 95632768, + "step": 757 + }, + { + "epoch": 0.19444622587058297, + "grad_norm": 45.137237548828125, + "learning_rate": 5e-06, + "loss": 1.6751, + "num_input_tokens_seen": 95760508, + "step": 758 + }, + { + "epoch": 0.19444622587058297, + "loss": 1.7807172536849976, + "loss_ce": 0.0033734641037881374, + "loss_iou": 0.8046875, + "loss_num": 0.03369140625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 95760508, + "step": 758 + }, + { + "epoch": 0.19470275123452832, + "grad_norm": 40.1661491394043, + "learning_rate": 5e-06, + "loss": 1.4898, + "num_input_tokens_seen": 95886204, + "step": 759 + }, + { + "epoch": 0.19470275123452832, + "loss": 1.3750102519989014, + "loss_ce": 0.004893088713288307, + "loss_iou": 0.62109375, + "loss_num": 0.0250244140625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 95886204, + "step": 759 + }, + { + "epoch": 0.19495927659847367, + "grad_norm": 79.29988861083984, + "learning_rate": 5e-06, + "loss": 1.3263, + "num_input_tokens_seen": 96013096, + "step": 760 + }, + { + "epoch": 0.19495927659847367, + "loss": 1.4730665683746338, + "loss_ce": 0.006269759498536587, + "loss_iou": 0.65625, + "loss_num": 0.0308837890625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 96013096, + "step": 760 + }, + { + "epoch": 0.19521580196241903, + "grad_norm": 52.226383209228516, + "learning_rate": 5e-06, + "loss": 1.6003, + "num_input_tokens_seen": 96139400, + "step": 761 + }, + { + "epoch": 0.19521580196241903, + "loss": 2.0208628177642822, + "loss_ce": 0.005237806122750044, + "loss_iou": 0.86328125, + "loss_num": 0.057861328125, + "loss_xval": 2.015625, + "num_input_tokens_seen": 96139400, + "step": 761 + }, + { + "epoch": 0.19547232732636438, + "grad_norm": 36.015525817871094, + "learning_rate": 5e-06, + "loss": 1.4566, + "num_input_tokens_seen": 96265368, + "step": 762 + }, + { + "epoch": 0.19547232732636438, + "loss": 1.4844902753829956, + "loss_ce": 0.0010917759500443935, + "loss_iou": 0.67578125, + "loss_num": 0.0260009765625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 96265368, + "step": 762 + }, + { + "epoch": 0.19572885269030976, + "grad_norm": 43.686038970947266, + "learning_rate": 5e-06, + "loss": 1.2942, + "num_input_tokens_seen": 96391796, + "step": 763 + }, + { + "epoch": 0.19572885269030976, + "loss": 1.3235526084899902, + "loss_ce": 0.0022636253852397203, + "loss_iou": 0.6015625, + "loss_num": 0.022705078125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 96391796, + "step": 763 + }, + { + "epoch": 0.19598537805425512, + "grad_norm": 63.98824691772461, + "learning_rate": 5e-06, + "loss": 1.3309, + "num_input_tokens_seen": 96519024, + "step": 764 + }, + { + "epoch": 0.19598537805425512, + "loss": 1.3297908306121826, + "loss_ce": 0.0036188391968607903, + "loss_iou": 0.61328125, + "loss_num": 0.019287109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 96519024, + "step": 764 + }, + { + "epoch": 0.19624190341820047, + "grad_norm": 46.816593170166016, + "learning_rate": 5e-06, + "loss": 1.5543, + "num_input_tokens_seen": 96644892, + "step": 765 + }, + { + "epoch": 0.19624190341820047, + "loss": 1.521022915840149, + "loss_ce": 0.0024682474322617054, + "loss_iou": 0.6953125, + "loss_num": 0.025146484375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 96644892, + "step": 765 + }, + { + "epoch": 0.19649842878214582, + "grad_norm": 53.84040451049805, + "learning_rate": 5e-06, + "loss": 1.2711, + "num_input_tokens_seen": 96770360, + "step": 766 + }, + { + "epoch": 0.19649842878214582, + "loss": 1.33516526222229, + "loss_ce": 0.0006925397319719195, + "loss_iou": 0.62890625, + "loss_num": 0.0157470703125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 96770360, + "step": 766 + }, + { + "epoch": 0.1967549541460912, + "grad_norm": 48.041412353515625, + "learning_rate": 5e-06, + "loss": 1.423, + "num_input_tokens_seen": 96896116, + "step": 767 + }, + { + "epoch": 0.1967549541460912, + "loss": 1.3917702436447144, + "loss_ce": 0.000656980206258595, + "loss_iou": 0.62890625, + "loss_num": 0.02734375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 96896116, + "step": 767 + }, + { + "epoch": 0.19701147951003656, + "grad_norm": 39.182594299316406, + "learning_rate": 5e-06, + "loss": 1.4535, + "num_input_tokens_seen": 97021760, + "step": 768 + }, + { + "epoch": 0.19701147951003656, + "loss": 1.2645025253295898, + "loss_ce": 0.0013189890887588263, + "loss_iou": 0.578125, + "loss_num": 0.02099609375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 97021760, + "step": 768 + }, + { + "epoch": 0.1972680048739819, + "grad_norm": 53.010398864746094, + "learning_rate": 5e-06, + "loss": 1.4903, + "num_input_tokens_seen": 97146952, + "step": 769 + }, + { + "epoch": 0.1972680048739819, + "loss": 1.525557041168213, + "loss_ce": 0.002119513927027583, + "loss_iou": 0.6953125, + "loss_num": 0.0260009765625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 97146952, + "step": 769 + }, + { + "epoch": 0.19752453023792726, + "grad_norm": 81.08814239501953, + "learning_rate": 5e-06, + "loss": 1.5406, + "num_input_tokens_seen": 97273620, + "step": 770 + }, + { + "epoch": 0.19752453023792726, + "loss": 1.5113434791564941, + "loss_ce": 0.0015779165551066399, + "loss_iou": 0.71484375, + "loss_num": 0.0157470703125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 97273620, + "step": 770 + }, + { + "epoch": 0.19778105560187265, + "grad_norm": 44.36695098876953, + "learning_rate": 5e-06, + "loss": 1.7818, + "num_input_tokens_seen": 97400476, + "step": 771 + }, + { + "epoch": 0.19778105560187265, + "loss": 1.807408332824707, + "loss_ce": 0.0017443099059164524, + "loss_iou": 0.8125, + "loss_num": 0.03564453125, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 97400476, + "step": 771 + }, + { + "epoch": 0.198037580965818, + "grad_norm": 43.34067916870117, + "learning_rate": 5e-06, + "loss": 1.4146, + "num_input_tokens_seen": 97526952, + "step": 772 + }, + { + "epoch": 0.198037580965818, + "loss": 1.3824810981750488, + "loss_ce": 0.003086468204855919, + "loss_iou": 0.6328125, + "loss_num": 0.02197265625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 97526952, + "step": 772 + }, + { + "epoch": 0.19829410632976335, + "grad_norm": 94.08848571777344, + "learning_rate": 5e-06, + "loss": 1.4381, + "num_input_tokens_seen": 97652484, + "step": 773 + }, + { + "epoch": 0.19829410632976335, + "loss": 1.4073779582977295, + "loss_ce": 0.0021044581662863493, + "loss_iou": 0.66015625, + "loss_num": 0.016357421875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 97652484, + "step": 773 + }, + { + "epoch": 0.1985506316937087, + "grad_norm": 48.792503356933594, + "learning_rate": 5e-06, + "loss": 1.5903, + "num_input_tokens_seen": 97779496, + "step": 774 + }, + { + "epoch": 0.1985506316937087, + "loss": 1.539163589477539, + "loss_ce": 0.0010775867849588394, + "loss_iou": 0.70703125, + "loss_num": 0.024169921875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 97779496, + "step": 774 + }, + { + "epoch": 0.1988071570576541, + "grad_norm": 32.22834014892578, + "learning_rate": 5e-06, + "loss": 1.3709, + "num_input_tokens_seen": 97904904, + "step": 775 + }, + { + "epoch": 0.1988071570576541, + "loss": 1.3850696086883545, + "loss_ce": 0.003722058143466711, + "loss_iou": 0.63671875, + "loss_num": 0.0211181640625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 97904904, + "step": 775 + }, + { + "epoch": 0.19906368242159944, + "grad_norm": 41.414955139160156, + "learning_rate": 5e-06, + "loss": 1.2273, + "num_input_tokens_seen": 98031372, + "step": 776 + }, + { + "epoch": 0.19906368242159944, + "loss": 1.1168365478515625, + "loss_ce": 0.0016021563205868006, + "loss_iou": 0.515625, + "loss_num": 0.016357421875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 98031372, + "step": 776 + }, + { + "epoch": 0.1993202077855448, + "grad_norm": 76.48211669921875, + "learning_rate": 5e-06, + "loss": 1.3155, + "num_input_tokens_seen": 98158076, + "step": 777 + }, + { + "epoch": 0.1993202077855448, + "loss": 1.1445887088775635, + "loss_ce": 0.0005457630031742156, + "loss_iou": 0.546875, + "loss_num": 0.01031494140625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 98158076, + "step": 777 + }, + { + "epoch": 0.19957673314949015, + "grad_norm": 52.87351608276367, + "learning_rate": 5e-06, + "loss": 1.5774, + "num_input_tokens_seen": 98285056, + "step": 778 + }, + { + "epoch": 0.19957673314949015, + "loss": 1.4616940021514893, + "loss_ce": 0.0017330326372757554, + "loss_iou": 0.68359375, + "loss_num": 0.01806640625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 98285056, + "step": 778 + }, + { + "epoch": 0.1998332585134355, + "grad_norm": 48.8697509765625, + "learning_rate": 5e-06, + "loss": 1.3163, + "num_input_tokens_seen": 98411888, + "step": 779 + }, + { + "epoch": 0.1998332585134355, + "loss": 1.3153234720230103, + "loss_ce": 0.0008703308994881809, + "loss_iou": 0.609375, + "loss_num": 0.0198974609375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 98411888, + "step": 779 + }, + { + "epoch": 0.20008978387738088, + "grad_norm": 58.962825775146484, + "learning_rate": 5e-06, + "loss": 1.4027, + "num_input_tokens_seen": 98539188, + "step": 780 + }, + { + "epoch": 0.20008978387738088, + "loss": 1.5080323219299316, + "loss_ce": 0.0011963420547544956, + "loss_iou": 0.66796875, + "loss_num": 0.0341796875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 98539188, + "step": 780 + }, + { + "epoch": 0.20034630924132624, + "grad_norm": 78.9335708618164, + "learning_rate": 5e-06, + "loss": 1.4963, + "num_input_tokens_seen": 98664888, + "step": 781 + }, + { + "epoch": 0.20034630924132624, + "loss": 1.776686191558838, + "loss_ce": 0.003248599823564291, + "loss_iou": 0.7890625, + "loss_num": 0.039306640625, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 98664888, + "step": 781 + }, + { + "epoch": 0.2006028346052716, + "grad_norm": 50.15766143798828, + "learning_rate": 5e-06, + "loss": 1.5371, + "num_input_tokens_seen": 98791076, + "step": 782 + }, + { + "epoch": 0.2006028346052716, + "loss": 1.482041358947754, + "loss_ce": 0.00157262256834656, + "loss_iou": 0.671875, + "loss_num": 0.0262451171875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 98791076, + "step": 782 + }, + { + "epoch": 0.20085935996921694, + "grad_norm": 28.655424118041992, + "learning_rate": 5e-06, + "loss": 1.2313, + "num_input_tokens_seen": 98918000, + "step": 783 + }, + { + "epoch": 0.20085935996921694, + "loss": 1.322141170501709, + "loss_ce": 0.0008520561968907714, + "loss_iou": 0.6171875, + "loss_num": 0.01806640625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 98918000, + "step": 783 + }, + { + "epoch": 0.20111588533316233, + "grad_norm": 64.15157318115234, + "learning_rate": 5e-06, + "loss": 1.25, + "num_input_tokens_seen": 99045596, + "step": 784 + }, + { + "epoch": 0.20111588533316233, + "loss": 1.271141529083252, + "loss_ce": 0.002586791757494211, + "loss_iou": 0.5859375, + "loss_num": 0.019287109375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 99045596, + "step": 784 + }, + { + "epoch": 0.20137241069710768, + "grad_norm": 37.62996292114258, + "learning_rate": 5e-06, + "loss": 1.4693, + "num_input_tokens_seen": 99170992, + "step": 785 + }, + { + "epoch": 0.20137241069710768, + "loss": 1.5090820789337158, + "loss_ce": 0.001757932361215353, + "loss_iou": 0.69140625, + "loss_num": 0.0244140625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 99170992, + "step": 785 + }, + { + "epoch": 0.20162893606105303, + "grad_norm": 50.869712829589844, + "learning_rate": 5e-06, + "loss": 1.3314, + "num_input_tokens_seen": 99297100, + "step": 786 + }, + { + "epoch": 0.20162893606105303, + "loss": 1.1391561031341553, + "loss_ce": 0.0019490821287035942, + "loss_iou": 0.52734375, + "loss_num": 0.016845703125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 99297100, + "step": 786 + }, + { + "epoch": 0.20188546142499839, + "grad_norm": 40.34794998168945, + "learning_rate": 5e-06, + "loss": 1.4675, + "num_input_tokens_seen": 99423248, + "step": 787 + }, + { + "epoch": 0.20188546142499839, + "loss": 1.4730722904205322, + "loss_ce": 0.0009043117752298713, + "loss_iou": 0.6796875, + "loss_num": 0.0225830078125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 99423248, + "step": 787 + }, + { + "epoch": 0.20214198678894377, + "grad_norm": 65.97105407714844, + "learning_rate": 5e-06, + "loss": 1.3843, + "num_input_tokens_seen": 99548900, + "step": 788 + }, + { + "epoch": 0.20214198678894377, + "loss": 1.373723030090332, + "loss_ce": 0.0016527818515896797, + "loss_iou": 0.6328125, + "loss_num": 0.0216064453125, + "loss_xval": 1.375, + "num_input_tokens_seen": 99548900, + "step": 788 + }, + { + "epoch": 0.20239851215288912, + "grad_norm": 52.80813980102539, + "learning_rate": 5e-06, + "loss": 1.4535, + "num_input_tokens_seen": 99676004, + "step": 789 + }, + { + "epoch": 0.20239851215288912, + "loss": 1.4932620525360107, + "loss_ce": 0.0015628508990630507, + "loss_iou": 0.671875, + "loss_num": 0.02978515625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 99676004, + "step": 789 + }, + { + "epoch": 0.20265503751683447, + "grad_norm": 32.37436294555664, + "learning_rate": 5e-06, + "loss": 1.398, + "num_input_tokens_seen": 99802560, + "step": 790 + }, + { + "epoch": 0.20265503751683447, + "loss": 1.1959346532821655, + "loss_ce": 0.001110466313548386, + "loss_iou": 0.5546875, + "loss_num": 0.0167236328125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 99802560, + "step": 790 + }, + { + "epoch": 0.20291156288077983, + "grad_norm": 44.16609191894531, + "learning_rate": 5e-06, + "loss": 1.4235, + "num_input_tokens_seen": 99928444, + "step": 791 + }, + { + "epoch": 0.20291156288077983, + "loss": 1.4066507816314697, + "loss_ce": 0.0062601035460829735, + "loss_iou": 0.640625, + "loss_num": 0.02392578125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 99928444, + "step": 791 + }, + { + "epoch": 0.2031680882447252, + "grad_norm": 60.706478118896484, + "learning_rate": 5e-06, + "loss": 1.4484, + "num_input_tokens_seen": 100055836, + "step": 792 + }, + { + "epoch": 0.2031680882447252, + "loss": 1.4433939456939697, + "loss_ce": 0.002964270766824484, + "loss_iou": 0.65234375, + "loss_num": 0.0269775390625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 100055836, + "step": 792 + }, + { + "epoch": 0.20342461360867056, + "grad_norm": 55.016624450683594, + "learning_rate": 5e-06, + "loss": 1.3917, + "num_input_tokens_seen": 100181760, + "step": 793 + }, + { + "epoch": 0.20342461360867056, + "loss": 1.3301042318344116, + "loss_ce": 0.001002660719677806, + "loss_iou": 0.62109375, + "loss_num": 0.0174560546875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 100181760, + "step": 793 + }, + { + "epoch": 0.20368113897261592, + "grad_norm": 85.83236694335938, + "learning_rate": 5e-06, + "loss": 1.4286, + "num_input_tokens_seen": 100308316, + "step": 794 + }, + { + "epoch": 0.20368113897261592, + "loss": 1.4247946739196777, + "loss_ce": 0.0019430036190897226, + "loss_iou": 0.671875, + "loss_num": 0.014892578125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 100308316, + "step": 794 + }, + { + "epoch": 0.20393766433656127, + "grad_norm": 54.04437255859375, + "learning_rate": 5e-06, + "loss": 1.5507, + "num_input_tokens_seen": 100434196, + "step": 795 + }, + { + "epoch": 0.20393766433656127, + "loss": 1.4950535297393799, + "loss_ce": 0.0018894305685535073, + "loss_iou": 0.69921875, + "loss_num": 0.01953125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 100434196, + "step": 795 + }, + { + "epoch": 0.20419418970050665, + "grad_norm": 36.24423599243164, + "learning_rate": 5e-06, + "loss": 1.5537, + "num_input_tokens_seen": 100560204, + "step": 796 + }, + { + "epoch": 0.20419418970050665, + "loss": 1.568851351737976, + "loss_ce": 0.0004919501952826977, + "loss_iou": 0.70703125, + "loss_num": 0.03076171875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 100560204, + "step": 796 + }, + { + "epoch": 0.204450715064452, + "grad_norm": 45.03144836425781, + "learning_rate": 5e-06, + "loss": 1.4457, + "num_input_tokens_seen": 100686512, + "step": 797 + }, + { + "epoch": 0.204450715064452, + "loss": 1.4703400135040283, + "loss_ce": 0.0015899080317467451, + "loss_iou": 0.68359375, + "loss_num": 0.0198974609375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 100686512, + "step": 797 + }, + { + "epoch": 0.20470724042839736, + "grad_norm": 82.81558990478516, + "learning_rate": 5e-06, + "loss": 1.2971, + "num_input_tokens_seen": 100812672, + "step": 798 + }, + { + "epoch": 0.20470724042839736, + "loss": 1.2421239614486694, + "loss_ce": 0.0018895948305726051, + "loss_iou": 0.5625, + "loss_num": 0.0230712890625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 100812672, + "step": 798 + }, + { + "epoch": 0.2049637657923427, + "grad_norm": 45.349609375, + "learning_rate": 5e-06, + "loss": 1.7063, + "num_input_tokens_seen": 100939496, + "step": 799 + }, + { + "epoch": 0.2049637657923427, + "loss": 1.6473942995071411, + "loss_ce": 0.0018865675665438175, + "loss_iou": 0.75, + "loss_num": 0.029541015625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 100939496, + "step": 799 + }, + { + "epoch": 0.20522029115628806, + "grad_norm": 51.783843994140625, + "learning_rate": 5e-06, + "loss": 1.354, + "num_input_tokens_seen": 101067232, + "step": 800 + }, + { + "epoch": 0.20522029115628806, + "loss": 1.234392523765564, + "loss_ce": 0.004900284577161074, + "loss_iou": 0.56640625, + "loss_num": 0.0185546875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 101067232, + "step": 800 + }, + { + "epoch": 0.20547681652023345, + "grad_norm": 91.6460189819336, + "learning_rate": 5e-06, + "loss": 1.5017, + "num_input_tokens_seen": 101192436, + "step": 801 + }, + { + "epoch": 0.20547681652023345, + "loss": 1.45248544216156, + "loss_ce": 0.0022901983465999365, + "loss_iou": 0.6640625, + "loss_num": 0.0238037109375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 101192436, + "step": 801 + }, + { + "epoch": 0.2057333418841788, + "grad_norm": 62.732173919677734, + "learning_rate": 5e-06, + "loss": 1.4953, + "num_input_tokens_seen": 101320300, + "step": 802 + }, + { + "epoch": 0.2057333418841788, + "loss": 1.6269625425338745, + "loss_ce": 0.0009860070422291756, + "loss_iou": 0.74609375, + "loss_num": 0.0263671875, + "loss_xval": 1.625, + "num_input_tokens_seen": 101320300, + "step": 802 + }, + { + "epoch": 0.20598986724812415, + "grad_norm": 55.252281188964844, + "learning_rate": 5e-06, + "loss": 1.2914, + "num_input_tokens_seen": 101445868, + "step": 803 + }, + { + "epoch": 0.20598986724812415, + "loss": 1.183439016342163, + "loss_ce": 0.001310082501731813, + "loss_iou": 0.546875, + "loss_num": 0.0181884765625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 101445868, + "step": 803 + }, + { + "epoch": 0.2062463926120695, + "grad_norm": 33.75856018066406, + "learning_rate": 5e-06, + "loss": 1.4096, + "num_input_tokens_seen": 101571532, + "step": 804 + }, + { + "epoch": 0.2062463926120695, + "loss": 1.4584627151489258, + "loss_ce": 0.006314342841506004, + "loss_iou": 0.625, + "loss_num": 0.039794921875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 101571532, + "step": 804 + }, + { + "epoch": 0.2065029179760149, + "grad_norm": 30.69576072692871, + "learning_rate": 5e-06, + "loss": 1.3549, + "num_input_tokens_seen": 101697644, + "step": 805 + }, + { + "epoch": 0.2065029179760149, + "loss": 1.3334208726882935, + "loss_ce": 0.0013896661112084985, + "loss_iou": 0.6171875, + "loss_num": 0.019775390625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 101697644, + "step": 805 + }, + { + "epoch": 0.20675944333996024, + "grad_norm": 60.3752555847168, + "learning_rate": 5e-06, + "loss": 1.3265, + "num_input_tokens_seen": 101823464, + "step": 806 + }, + { + "epoch": 0.20675944333996024, + "loss": 1.4075208902359009, + "loss_ce": 0.002735760062932968, + "loss_iou": 0.6328125, + "loss_num": 0.0272216796875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 101823464, + "step": 806 + }, + { + "epoch": 0.2070159687039056, + "grad_norm": 53.74232864379883, + "learning_rate": 5e-06, + "loss": 1.5127, + "num_input_tokens_seen": 101950832, + "step": 807 + }, + { + "epoch": 0.2070159687039056, + "loss": 1.6723754405975342, + "loss_ce": 0.0024535488337278366, + "loss_iou": 0.765625, + "loss_num": 0.028076171875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 101950832, + "step": 807 + }, + { + "epoch": 0.20727249406785095, + "grad_norm": 45.628204345703125, + "learning_rate": 5e-06, + "loss": 1.2721, + "num_input_tokens_seen": 102077556, + "step": 808 + }, + { + "epoch": 0.20727249406785095, + "loss": 1.4370296001434326, + "loss_ce": 0.0009944618213921785, + "loss_iou": 0.671875, + "loss_num": 0.0186767578125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 102077556, + "step": 808 + }, + { + "epoch": 0.20752901943179633, + "grad_norm": 48.517242431640625, + "learning_rate": 5e-06, + "loss": 1.462, + "num_input_tokens_seen": 102203288, + "step": 809 + }, + { + "epoch": 0.20752901943179633, + "loss": 1.6231465339660645, + "loss_ce": 0.005959098692983389, + "loss_iou": 0.7109375, + "loss_num": 0.03857421875, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 102203288, + "step": 809 + }, + { + "epoch": 0.20778554479574168, + "grad_norm": 55.30953598022461, + "learning_rate": 5e-06, + "loss": 1.3801, + "num_input_tokens_seen": 102329584, + "step": 810 + }, + { + "epoch": 0.20778554479574168, + "loss": 1.3222111463546753, + "loss_ce": 0.001898646936751902, + "loss_iou": 0.609375, + "loss_num": 0.020751953125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 102329584, + "step": 810 + }, + { + "epoch": 0.20804207015968704, + "grad_norm": 54.080875396728516, + "learning_rate": 5e-06, + "loss": 1.5275, + "num_input_tokens_seen": 102456596, + "step": 811 + }, + { + "epoch": 0.20804207015968704, + "loss": 1.4465858936309814, + "loss_ce": 0.0012734452029690146, + "loss_iou": 0.671875, + "loss_num": 0.0201416015625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 102456596, + "step": 811 + }, + { + "epoch": 0.2082985955236324, + "grad_norm": 62.66007995605469, + "learning_rate": 5e-06, + "loss": 1.4349, + "num_input_tokens_seen": 102582100, + "step": 812 + }, + { + "epoch": 0.2082985955236324, + "loss": 1.3630447387695312, + "loss_ce": 0.0046462505124509335, + "loss_iou": 0.62890625, + "loss_num": 0.020751953125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 102582100, + "step": 812 + }, + { + "epoch": 0.20855512088757777, + "grad_norm": 47.617431640625, + "learning_rate": 5e-06, + "loss": 1.2773, + "num_input_tokens_seen": 102708012, + "step": 813 + }, + { + "epoch": 0.20855512088757777, + "loss": 1.2044544219970703, + "loss_ce": 0.0018176923040300608, + "loss_iou": 0.55859375, + "loss_num": 0.0172119140625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 102708012, + "step": 813 + }, + { + "epoch": 0.20881164625152313, + "grad_norm": 46.293365478515625, + "learning_rate": 5e-06, + "loss": 1.3346, + "num_input_tokens_seen": 102835288, + "step": 814 + }, + { + "epoch": 0.20881164625152313, + "loss": 1.2680859565734863, + "loss_ce": 0.0005077685927972198, + "loss_iou": 0.59375, + "loss_num": 0.0164794921875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 102835288, + "step": 814 + }, + { + "epoch": 0.20906817161546848, + "grad_norm": 53.65690231323242, + "learning_rate": 5e-06, + "loss": 1.3652, + "num_input_tokens_seen": 102962128, + "step": 815 + }, + { + "epoch": 0.20906817161546848, + "loss": 1.369244933128357, + "loss_ce": 0.004010563716292381, + "loss_iou": 0.6328125, + "loss_num": 0.0205078125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 102962128, + "step": 815 + }, + { + "epoch": 0.20932469697941383, + "grad_norm": 100.80943298339844, + "learning_rate": 5e-06, + "loss": 1.2632, + "num_input_tokens_seen": 103089540, + "step": 816 + }, + { + "epoch": 0.20932469697941383, + "loss": 1.3470107316970825, + "loss_ce": 0.002284254413098097, + "loss_iou": 0.62109375, + "loss_num": 0.0205078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 103089540, + "step": 816 + }, + { + "epoch": 0.20958122234335919, + "grad_norm": 51.63835525512695, + "learning_rate": 5e-06, + "loss": 1.4528, + "num_input_tokens_seen": 103216072, + "step": 817 + }, + { + "epoch": 0.20958122234335919, + "loss": 1.3604388236999512, + "loss_ce": 0.002040391555055976, + "loss_iou": 0.62890625, + "loss_num": 0.0205078125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 103216072, + "step": 817 + }, + { + "epoch": 0.20983774770730457, + "grad_norm": 60.27116394042969, + "learning_rate": 5e-06, + "loss": 1.3946, + "num_input_tokens_seen": 103342160, + "step": 818 + }, + { + "epoch": 0.20983774770730457, + "loss": 1.3125784397125244, + "loss_ce": 0.0015433471417054534, + "loss_iou": 0.59375, + "loss_num": 0.0247802734375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 103342160, + "step": 818 + }, + { + "epoch": 0.21009427307124992, + "grad_norm": 53.027748107910156, + "learning_rate": 5e-06, + "loss": 1.3837, + "num_input_tokens_seen": 103468968, + "step": 819 + }, + { + "epoch": 0.21009427307124992, + "loss": 1.3300693035125732, + "loss_ce": 0.0009676225599832833, + "loss_iou": 0.6171875, + "loss_num": 0.019287109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 103468968, + "step": 819 + }, + { + "epoch": 0.21035079843519527, + "grad_norm": 79.68496704101562, + "learning_rate": 5e-06, + "loss": 1.2889, + "num_input_tokens_seen": 103596460, + "step": 820 + }, + { + "epoch": 0.21035079843519527, + "loss": 1.4892619848251343, + "loss_ce": 0.0009806393645703793, + "loss_iou": 0.67578125, + "loss_num": 0.0272216796875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 103596460, + "step": 820 + }, + { + "epoch": 0.21060732379914063, + "grad_norm": 45.95759201049805, + "learning_rate": 5e-06, + "loss": 1.5171, + "num_input_tokens_seen": 103722560, + "step": 821 + }, + { + "epoch": 0.21060732379914063, + "loss": 1.4734864234924316, + "loss_ce": 0.0057129324413836, + "loss_iou": 0.66796875, + "loss_num": 0.026123046875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 103722560, + "step": 821 + }, + { + "epoch": 0.210863849163086, + "grad_norm": 43.79263687133789, + "learning_rate": 5e-06, + "loss": 1.3843, + "num_input_tokens_seen": 103848852, + "step": 822 + }, + { + "epoch": 0.210863849163086, + "loss": 1.3302783966064453, + "loss_ce": 0.0036182901822030544, + "loss_iou": 0.62109375, + "loss_num": 0.0162353515625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 103848852, + "step": 822 + }, + { + "epoch": 0.21112037452703136, + "grad_norm": 37.63969421386719, + "learning_rate": 5e-06, + "loss": 1.3906, + "num_input_tokens_seen": 103975516, + "step": 823 + }, + { + "epoch": 0.21112037452703136, + "loss": 1.3655592203140259, + "loss_ce": 0.0003248662978876382, + "loss_iou": 0.63671875, + "loss_num": 0.017822265625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 103975516, + "step": 823 + }, + { + "epoch": 0.21137689989097672, + "grad_norm": 43.72939682006836, + "learning_rate": 5e-06, + "loss": 1.3723, + "num_input_tokens_seen": 104102344, + "step": 824 + }, + { + "epoch": 0.21137689989097672, + "loss": 1.3853837251663208, + "loss_ce": 0.0025712454225867987, + "loss_iou": 0.62890625, + "loss_num": 0.0252685546875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 104102344, + "step": 824 + }, + { + "epoch": 0.21163342525492207, + "grad_norm": 43.155025482177734, + "learning_rate": 5e-06, + "loss": 1.48, + "num_input_tokens_seen": 104227936, + "step": 825 + }, + { + "epoch": 0.21163342525492207, + "loss": 1.3939366340637207, + "loss_ce": 0.0023349791299551725, + "loss_iou": 0.640625, + "loss_num": 0.022705078125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 104227936, + "step": 825 + }, + { + "epoch": 0.21188995061886745, + "grad_norm": 63.234066009521484, + "learning_rate": 5e-06, + "loss": 1.2369, + "num_input_tokens_seen": 104355140, + "step": 826 + }, + { + "epoch": 0.21188995061886745, + "loss": 1.2037829160690308, + "loss_ce": 0.0011461504036560655, + "loss_iou": 0.55859375, + "loss_num": 0.0172119140625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 104355140, + "step": 826 + }, + { + "epoch": 0.2121464759828128, + "grad_norm": 64.97721099853516, + "learning_rate": 5e-06, + "loss": 1.2823, + "num_input_tokens_seen": 104481824, + "step": 827 + }, + { + "epoch": 0.2121464759828128, + "loss": 1.1754415035247803, + "loss_ce": 0.0006368473987095058, + "loss_iou": 0.55078125, + "loss_num": 0.01458740234375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 104481824, + "step": 827 + }, + { + "epoch": 0.21240300134675816, + "grad_norm": 72.48421478271484, + "learning_rate": 5e-06, + "loss": 1.4317, + "num_input_tokens_seen": 104608260, + "step": 828 + }, + { + "epoch": 0.21240300134675816, + "loss": 1.3465311527252197, + "loss_ce": 0.0027811911422759295, + "loss_iou": 0.62109375, + "loss_num": 0.0201416015625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 104608260, + "step": 828 + }, + { + "epoch": 0.2126595267107035, + "grad_norm": 41.29340744018555, + "learning_rate": 5e-06, + "loss": 1.3899, + "num_input_tokens_seen": 104735052, + "step": 829 + }, + { + "epoch": 0.2126595267107035, + "loss": 1.2978742122650146, + "loss_ce": 0.0009992625564336777, + "loss_iou": 0.58984375, + "loss_num": 0.02294921875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 104735052, + "step": 829 + }, + { + "epoch": 0.2129160520746489, + "grad_norm": 46.55683898925781, + "learning_rate": 5e-06, + "loss": 1.2984, + "num_input_tokens_seen": 104862176, + "step": 830 + }, + { + "epoch": 0.2129160520746489, + "loss": 1.2739205360412598, + "loss_ce": 0.0009714420302771032, + "loss_iou": 0.59375, + "loss_num": 0.017333984375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 104862176, + "step": 830 + }, + { + "epoch": 0.21317257743859425, + "grad_norm": 55.45515823364258, + "learning_rate": 5e-06, + "loss": 1.5183, + "num_input_tokens_seen": 104989212, + "step": 831 + }, + { + "epoch": 0.21317257743859425, + "loss": 1.397540807723999, + "loss_ce": 0.0010564766125753522, + "loss_iou": 0.65234375, + "loss_num": 0.018798828125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 104989212, + "step": 831 + }, + { + "epoch": 0.2134291028025396, + "grad_norm": 48.31146240234375, + "learning_rate": 5e-06, + "loss": 1.2856, + "num_input_tokens_seen": 105115396, + "step": 832 + }, + { + "epoch": 0.2134291028025396, + "loss": 1.3623230457305908, + "loss_ce": 0.0009949113009497523, + "loss_iou": 0.63671875, + "loss_num": 0.01806640625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 105115396, + "step": 832 + }, + { + "epoch": 0.21368562816648495, + "grad_norm": 47.06501770019531, + "learning_rate": 5e-06, + "loss": 1.3505, + "num_input_tokens_seen": 105241008, + "step": 833 + }, + { + "epoch": 0.21368562816648495, + "loss": 1.1678062677383423, + "loss_ce": 0.004231990315020084, + "loss_iou": 0.54296875, + "loss_num": 0.015625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 105241008, + "step": 833 + }, + { + "epoch": 0.21394215353043033, + "grad_norm": 91.97337341308594, + "learning_rate": 5e-06, + "loss": 1.4153, + "num_input_tokens_seen": 105366460, + "step": 834 + }, + { + "epoch": 0.21394215353043033, + "loss": 1.7248039245605469, + "loss_ce": 0.005077243782579899, + "loss_iou": 0.78125, + "loss_num": 0.03076171875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 105366460, + "step": 834 + }, + { + "epoch": 0.2141986788943757, + "grad_norm": 51.19464874267578, + "learning_rate": 5e-06, + "loss": 1.6737, + "num_input_tokens_seen": 105492976, + "step": 835 + }, + { + "epoch": 0.2141986788943757, + "loss": 1.7197816371917725, + "loss_ce": 0.0020082485862076283, + "loss_iou": 0.765625, + "loss_num": 0.037841796875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 105492976, + "step": 835 + }, + { + "epoch": 0.21445520425832104, + "grad_norm": 44.47203826904297, + "learning_rate": 5e-06, + "loss": 1.3802, + "num_input_tokens_seen": 105618980, + "step": 836 + }, + { + "epoch": 0.21445520425832104, + "loss": 1.3062474727630615, + "loss_ce": 0.0010716654360294342, + "loss_iou": 0.58203125, + "loss_num": 0.0284423828125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 105618980, + "step": 836 + }, + { + "epoch": 0.2147117296222664, + "grad_norm": 89.10150909423828, + "learning_rate": 5e-06, + "loss": 1.4376, + "num_input_tokens_seen": 105745284, + "step": 837 + }, + { + "epoch": 0.2147117296222664, + "loss": 1.2511318922042847, + "loss_ce": 0.0021083992905914783, + "loss_iou": 0.58203125, + "loss_num": 0.0169677734375, + "loss_xval": 1.25, + "num_input_tokens_seen": 105745284, + "step": 837 + }, + { + "epoch": 0.21496825498621175, + "grad_norm": 46.902748107910156, + "learning_rate": 5e-06, + "loss": 1.5398, + "num_input_tokens_seen": 105871968, + "step": 838 + }, + { + "epoch": 0.21496825498621175, + "loss": 1.6062655448913574, + "loss_ce": 0.0007968974532559514, + "loss_iou": 0.7421875, + "loss_num": 0.024169921875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 105871968, + "step": 838 + }, + { + "epoch": 0.21522478035015713, + "grad_norm": 45.26897048950195, + "learning_rate": 5e-06, + "loss": 1.2715, + "num_input_tokens_seen": 105998052, + "step": 839 + }, + { + "epoch": 0.21522478035015713, + "loss": 1.1095643043518066, + "loss_ce": 0.0021425168961286545, + "loss_iou": 0.50390625, + "loss_num": 0.0198974609375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 105998052, + "step": 839 + }, + { + "epoch": 0.21548130571410248, + "grad_norm": 54.874427795410156, + "learning_rate": 5e-06, + "loss": 1.2929, + "num_input_tokens_seen": 106124128, + "step": 840 + }, + { + "epoch": 0.21548130571410248, + "loss": 1.1453044414520264, + "loss_ce": 0.001261463388800621, + "loss_iou": 0.53515625, + "loss_num": 0.01531982421875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 106124128, + "step": 840 + }, + { + "epoch": 0.21573783107804784, + "grad_norm": 55.62739181518555, + "learning_rate": 5e-06, + "loss": 1.4505, + "num_input_tokens_seen": 106251148, + "step": 841 + }, + { + "epoch": 0.21573783107804784, + "loss": 1.4168570041656494, + "loss_ce": 0.007677407935261726, + "loss_iou": 0.640625, + "loss_num": 0.0252685546875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 106251148, + "step": 841 + }, + { + "epoch": 0.2159943564419932, + "grad_norm": 72.67060089111328, + "learning_rate": 5e-06, + "loss": 1.2879, + "num_input_tokens_seen": 106377760, + "step": 842 + }, + { + "epoch": 0.2159943564419932, + "loss": 1.3242168426513672, + "loss_ce": 0.00048632241669110954, + "loss_iou": 0.61328125, + "loss_num": 0.0203857421875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 106377760, + "step": 842 + }, + { + "epoch": 0.21625088180593857, + "grad_norm": 49.955142974853516, + "learning_rate": 5e-06, + "loss": 1.3913, + "num_input_tokens_seen": 106503728, + "step": 843 + }, + { + "epoch": 0.21625088180593857, + "loss": 1.4327881336212158, + "loss_ce": 0.0031006329227238894, + "loss_iou": 0.65625, + "loss_num": 0.0234375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 106503728, + "step": 843 + }, + { + "epoch": 0.21650740716988393, + "grad_norm": 54.71107864379883, + "learning_rate": 5e-06, + "loss": 1.1264, + "num_input_tokens_seen": 106630604, + "step": 844 + }, + { + "epoch": 0.21650740716988393, + "loss": 1.162672996520996, + "loss_ce": 0.0010519400238990784, + "loss_iou": 0.55078125, + "loss_num": 0.01165771484375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 106630604, + "step": 844 + }, + { + "epoch": 0.21676393253382928, + "grad_norm": 96.05371856689453, + "learning_rate": 5e-06, + "loss": 1.5255, + "num_input_tokens_seen": 106757264, + "step": 845 + }, + { + "epoch": 0.21676393253382928, + "loss": 1.250503420829773, + "loss_ce": 0.004409621469676495, + "loss_iou": 0.57421875, + "loss_num": 0.0189208984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 106757264, + "step": 845 + }, + { + "epoch": 0.21702045789777463, + "grad_norm": 51.8949089050293, + "learning_rate": 5e-06, + "loss": 1.4789, + "num_input_tokens_seen": 106885472, + "step": 846 + }, + { + "epoch": 0.21702045789777463, + "loss": 1.2629574537277222, + "loss_ce": 0.0027034739032387733, + "loss_iou": 0.58203125, + "loss_num": 0.019775390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 106885472, + "step": 846 + }, + { + "epoch": 0.21727698326172, + "grad_norm": 47.95415496826172, + "learning_rate": 5e-06, + "loss": 1.298, + "num_input_tokens_seen": 107012156, + "step": 847 + }, + { + "epoch": 0.21727698326172, + "loss": 1.4818785190582275, + "loss_ce": 0.0014097160892561078, + "loss_iou": 0.65625, + "loss_num": 0.033203125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 107012156, + "step": 847 + }, + { + "epoch": 0.21753350862566537, + "grad_norm": 37.76900100708008, + "learning_rate": 5e-06, + "loss": 1.2597, + "num_input_tokens_seen": 107137948, + "step": 848 + }, + { + "epoch": 0.21753350862566537, + "loss": 1.111304521560669, + "loss_ce": 0.003394325729459524, + "loss_iou": 0.51953125, + "loss_num": 0.0135498046875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 107137948, + "step": 848 + }, + { + "epoch": 0.21779003398961072, + "grad_norm": 38.0576286315918, + "learning_rate": 5e-06, + "loss": 1.419, + "num_input_tokens_seen": 107264188, + "step": 849 + }, + { + "epoch": 0.21779003398961072, + "loss": 1.7798386812210083, + "loss_ce": 0.003471519099548459, + "loss_iou": 0.76171875, + "loss_num": 0.05029296875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 107264188, + "step": 849 + }, + { + "epoch": 0.21804655935355607, + "grad_norm": 70.96070861816406, + "learning_rate": 5e-06, + "loss": 1.422, + "num_input_tokens_seen": 107391188, + "step": 850 + }, + { + "epoch": 0.21804655935355607, + "loss": 1.4808650016784668, + "loss_ce": 0.0013728067278862, + "loss_iou": 0.703125, + "loss_num": 0.01434326171875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 107391188, + "step": 850 + }, + { + "epoch": 0.21830308471750146, + "grad_norm": 45.92108154296875, + "learning_rate": 5e-06, + "loss": 1.5359, + "num_input_tokens_seen": 107516540, + "step": 851 + }, + { + "epoch": 0.21830308471750146, + "loss": 1.464888572692871, + "loss_ce": 0.0019980687648057938, + "loss_iou": 0.66796875, + "loss_num": 0.0245361328125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 107516540, + "step": 851 + }, + { + "epoch": 0.2185596100814468, + "grad_norm": 26.80365753173828, + "learning_rate": 5e-06, + "loss": 1.3157, + "num_input_tokens_seen": 107641844, + "step": 852 + }, + { + "epoch": 0.2185596100814468, + "loss": 1.4777626991271973, + "loss_ce": 0.002176663838326931, + "loss_iou": 0.6484375, + "loss_num": 0.035400390625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 107641844, + "step": 852 + }, + { + "epoch": 0.21881613544539216, + "grad_norm": 34.28759765625, + "learning_rate": 5e-06, + "loss": 1.2558, + "num_input_tokens_seen": 107770012, + "step": 853 + }, + { + "epoch": 0.21881613544539216, + "loss": 1.2734986543655396, + "loss_ce": 0.0025026067160069942, + "loss_iou": 0.5859375, + "loss_num": 0.0191650390625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 107770012, + "step": 853 + }, + { + "epoch": 0.21907266080933752, + "grad_norm": 47.869895935058594, + "learning_rate": 5e-06, + "loss": 1.2348, + "num_input_tokens_seen": 107896956, + "step": 854 + }, + { + "epoch": 0.21907266080933752, + "loss": 1.1121585369110107, + "loss_ce": 0.0008303733193315566, + "loss_iou": 0.52734375, + "loss_num": 0.01190185546875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 107896956, + "step": 854 + }, + { + "epoch": 0.21932918617328287, + "grad_norm": 80.51130676269531, + "learning_rate": 5e-06, + "loss": 1.4419, + "num_input_tokens_seen": 108023328, + "step": 855 + }, + { + "epoch": 0.21932918617328287, + "loss": 1.328608512878418, + "loss_ce": 0.0019484497606754303, + "loss_iou": 0.61328125, + "loss_num": 0.0201416015625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 108023328, + "step": 855 + }, + { + "epoch": 0.21958571153722825, + "grad_norm": 53.079898834228516, + "learning_rate": 5e-06, + "loss": 1.41, + "num_input_tokens_seen": 108149916, + "step": 856 + }, + { + "epoch": 0.21958571153722825, + "loss": 1.3083007335662842, + "loss_ce": 0.0016601296374574304, + "loss_iou": 0.58984375, + "loss_num": 0.0252685546875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 108149916, + "step": 856 + }, + { + "epoch": 0.2198422369011736, + "grad_norm": 44.758968353271484, + "learning_rate": 5e-06, + "loss": 1.2811, + "num_input_tokens_seen": 108276980, + "step": 857 + }, + { + "epoch": 0.2198422369011736, + "loss": 1.339543342590332, + "loss_ce": 0.0011643850011751056, + "loss_iou": 0.6328125, + "loss_num": 0.0152587890625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 108276980, + "step": 857 + }, + { + "epoch": 0.22009876226511896, + "grad_norm": 39.31000518798828, + "learning_rate": 5e-06, + "loss": 1.3571, + "num_input_tokens_seen": 108403728, + "step": 858 + }, + { + "epoch": 0.22009876226511896, + "loss": 1.3346402645111084, + "loss_ce": 0.005050357896834612, + "loss_iou": 0.58984375, + "loss_num": 0.0303955078125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 108403728, + "step": 858 + }, + { + "epoch": 0.2203552876290643, + "grad_norm": 71.66879272460938, + "learning_rate": 5e-06, + "loss": 1.262, + "num_input_tokens_seen": 108529600, + "step": 859 + }, + { + "epoch": 0.2203552876290643, + "loss": 1.2392791509628296, + "loss_ce": 0.00099792773835361, + "loss_iou": 0.58203125, + "loss_num": 0.01446533203125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 108529600, + "step": 859 + }, + { + "epoch": 0.2206118129930097, + "grad_norm": 62.0504264831543, + "learning_rate": 5e-06, + "loss": 1.4328, + "num_input_tokens_seen": 108656260, + "step": 860 + }, + { + "epoch": 0.2206118129930097, + "loss": 1.4119822978973389, + "loss_ce": 0.0037791808135807514, + "loss_iou": 0.6328125, + "loss_num": 0.028076171875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 108656260, + "step": 860 + }, + { + "epoch": 0.22086833835695505, + "grad_norm": 77.06497192382812, + "learning_rate": 5e-06, + "loss": 1.34, + "num_input_tokens_seen": 108784348, + "step": 861 + }, + { + "epoch": 0.22086833835695505, + "loss": 1.4308797121047974, + "loss_ce": 0.001192188821732998, + "loss_iou": 0.66796875, + "loss_num": 0.0185546875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 108784348, + "step": 861 + }, + { + "epoch": 0.2211248637209004, + "grad_norm": 51.25062942504883, + "learning_rate": 5e-06, + "loss": 1.5666, + "num_input_tokens_seen": 108910704, + "step": 862 + }, + { + "epoch": 0.2211248637209004, + "loss": 1.519243597984314, + "loss_ce": 0.0006889344658702612, + "loss_iou": 0.6875, + "loss_num": 0.028564453125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 108910704, + "step": 862 + }, + { + "epoch": 0.22138138908484575, + "grad_norm": 42.317352294921875, + "learning_rate": 5e-06, + "loss": 1.3336, + "num_input_tokens_seen": 109035996, + "step": 863 + }, + { + "epoch": 0.22138138908484575, + "loss": 1.2780342102050781, + "loss_ce": 0.0016669936012476683, + "loss_iou": 0.58984375, + "loss_num": 0.01904296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 109035996, + "step": 863 + }, + { + "epoch": 0.22163791444879113, + "grad_norm": 31.0533390045166, + "learning_rate": 5e-06, + "loss": 1.3345, + "num_input_tokens_seen": 109161060, + "step": 864 + }, + { + "epoch": 0.22163791444879113, + "loss": 1.3667558431625366, + "loss_ce": 0.001521471654996276, + "loss_iou": 0.6171875, + "loss_num": 0.0257568359375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 109161060, + "step": 864 + }, + { + "epoch": 0.2218944398127365, + "grad_norm": 39.44700622558594, + "learning_rate": 5e-06, + "loss": 1.2082, + "num_input_tokens_seen": 109287920, + "step": 865 + }, + { + "epoch": 0.2218944398127365, + "loss": 1.478264331817627, + "loss_ce": 0.002678437391296029, + "loss_iou": 0.67578125, + "loss_num": 0.02490234375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 109287920, + "step": 865 + }, + { + "epoch": 0.22215096517668184, + "grad_norm": 58.90821075439453, + "learning_rate": 5e-06, + "loss": 1.4515, + "num_input_tokens_seen": 109414996, + "step": 866 + }, + { + "epoch": 0.22215096517668184, + "loss": 1.3711152076721191, + "loss_ce": 0.0009980072500184178, + "loss_iou": 0.625, + "loss_num": 0.02392578125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 109414996, + "step": 866 + }, + { + "epoch": 0.2224074905406272, + "grad_norm": 42.269081115722656, + "learning_rate": 5e-06, + "loss": 1.2681, + "num_input_tokens_seen": 109538420, + "step": 867 + }, + { + "epoch": 0.2224074905406272, + "loss": 1.471693754196167, + "loss_ce": 0.000990603817626834, + "loss_iou": 0.671875, + "loss_num": 0.025146484375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 109538420, + "step": 867 + }, + { + "epoch": 0.22266401590457258, + "grad_norm": 38.11931610107422, + "learning_rate": 5e-06, + "loss": 1.192, + "num_input_tokens_seen": 109663540, + "step": 868 + }, + { + "epoch": 0.22266401590457258, + "loss": 1.1448726654052734, + "loss_ce": 0.0022946279495954514, + "loss_iou": 0.5234375, + "loss_num": 0.0194091796875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 109663540, + "step": 868 + }, + { + "epoch": 0.22292054126851793, + "grad_norm": 50.76338577270508, + "learning_rate": 5e-06, + "loss": 1.1968, + "num_input_tokens_seen": 109790684, + "step": 869 + }, + { + "epoch": 0.22292054126851793, + "loss": 1.1969025135040283, + "loss_ce": 0.0054962593130767345, + "loss_iou": 0.5625, + "loss_num": 0.0126953125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 109790684, + "step": 869 + }, + { + "epoch": 0.22317706663246328, + "grad_norm": 118.55126190185547, + "learning_rate": 5e-06, + "loss": 1.4395, + "num_input_tokens_seen": 109917928, + "step": 870 + }, + { + "epoch": 0.22317706663246328, + "loss": 1.4637610912322998, + "loss_ce": 0.0018470440991222858, + "loss_iou": 0.671875, + "loss_num": 0.024169921875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 109917928, + "step": 870 + }, + { + "epoch": 0.22343359199640864, + "grad_norm": 43.04262161254883, + "learning_rate": 5e-06, + "loss": 1.6447, + "num_input_tokens_seen": 110044084, + "step": 871 + }, + { + "epoch": 0.22343359199640864, + "loss": 1.7243461608886719, + "loss_ce": 0.002666470594704151, + "loss_iou": 0.76171875, + "loss_num": 0.0400390625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 110044084, + "step": 871 + }, + { + "epoch": 0.22369011736035402, + "grad_norm": 36.56740951538086, + "learning_rate": 5e-06, + "loss": 1.4981, + "num_input_tokens_seen": 110169636, + "step": 872 + }, + { + "epoch": 0.22369011736035402, + "loss": 1.4007444381713867, + "loss_ce": 0.0013303777668625116, + "loss_iou": 0.63671875, + "loss_num": 0.0247802734375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 110169636, + "step": 872 + }, + { + "epoch": 0.22394664272429937, + "grad_norm": 46.4896354675293, + "learning_rate": 5e-06, + "loss": 1.3143, + "num_input_tokens_seen": 110295380, + "step": 873 + }, + { + "epoch": 0.22394664272429937, + "loss": 1.302603006362915, + "loss_ce": 0.0008451527683064342, + "loss_iou": 0.6015625, + "loss_num": 0.019775390625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 110295380, + "step": 873 + }, + { + "epoch": 0.22420316808824473, + "grad_norm": 42.694236755371094, + "learning_rate": 5e-06, + "loss": 1.3806, + "num_input_tokens_seen": 110423364, + "step": 874 + }, + { + "epoch": 0.22420316808824473, + "loss": 1.3460360765457153, + "loss_ce": 0.0022860628087073565, + "loss_iou": 0.625, + "loss_num": 0.0184326171875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 110423364, + "step": 874 + }, + { + "epoch": 0.22445969345219008, + "grad_norm": 106.21011352539062, + "learning_rate": 5e-06, + "loss": 1.3139, + "num_input_tokens_seen": 110550520, + "step": 875 + }, + { + "epoch": 0.22445969345219008, + "loss": 1.1505510807037354, + "loss_ce": 0.00260187778621912, + "loss_iou": 0.546875, + "loss_num": 0.0113525390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 110550520, + "step": 875 + }, + { + "epoch": 0.22471621881613543, + "grad_norm": 188.66481018066406, + "learning_rate": 5e-06, + "loss": 1.5292, + "num_input_tokens_seen": 110676896, + "step": 876 + }, + { + "epoch": 0.22471621881613543, + "loss": 1.5147008895874023, + "loss_ce": 0.005911848973482847, + "loss_iou": 0.67578125, + "loss_num": 0.03173828125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 110676896, + "step": 876 + }, + { + "epoch": 0.2249727441800808, + "grad_norm": 42.38863754272461, + "learning_rate": 5e-06, + "loss": 1.3636, + "num_input_tokens_seen": 110803728, + "step": 877 + }, + { + "epoch": 0.2249727441800808, + "loss": 1.4267995357513428, + "loss_ce": 0.0010182390687987208, + "loss_iou": 0.65625, + "loss_num": 0.0235595703125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 110803728, + "step": 877 + }, + { + "epoch": 0.22522926954402617, + "grad_norm": 32.36529541015625, + "learning_rate": 5e-06, + "loss": 1.2338, + "num_input_tokens_seen": 110930224, + "step": 878 + }, + { + "epoch": 0.22522926954402617, + "loss": 1.123304009437561, + "loss_ce": 0.005139881744980812, + "loss_iou": 0.51953125, + "loss_num": 0.0164794921875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 110930224, + "step": 878 + }, + { + "epoch": 0.22548579490797152, + "grad_norm": 72.23925018310547, + "learning_rate": 5e-06, + "loss": 1.3108, + "num_input_tokens_seen": 111057324, + "step": 879 + }, + { + "epoch": 0.22548579490797152, + "loss": 1.4704192876815796, + "loss_ce": 0.0016692212084308267, + "loss_iou": 0.671875, + "loss_num": 0.024658203125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 111057324, + "step": 879 + }, + { + "epoch": 0.22574232027191687, + "grad_norm": 51.39903259277344, + "learning_rate": 5e-06, + "loss": 1.4156, + "num_input_tokens_seen": 111184256, + "step": 880 + }, + { + "epoch": 0.22574232027191687, + "loss": 1.5473936796188354, + "loss_ce": 0.0024719019420444965, + "loss_iou": 0.7109375, + "loss_num": 0.0244140625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 111184256, + "step": 880 + }, + { + "epoch": 0.22599884563586226, + "grad_norm": 30.70134735107422, + "learning_rate": 5e-06, + "loss": 1.3595, + "num_input_tokens_seen": 111309196, + "step": 881 + }, + { + "epoch": 0.22599884563586226, + "loss": 1.4198918342590332, + "loss_ce": 0.005829379893839359, + "loss_iou": 0.625, + "loss_num": 0.031982421875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 111309196, + "step": 881 + }, + { + "epoch": 0.2262553709998076, + "grad_norm": 31.428083419799805, + "learning_rate": 5e-06, + "loss": 1.2574, + "num_input_tokens_seen": 111435944, + "step": 882 + }, + { + "epoch": 0.2262553709998076, + "loss": 1.2775641679763794, + "loss_ce": 0.003150083590298891, + "loss_iou": 0.5859375, + "loss_num": 0.02001953125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 111435944, + "step": 882 + }, + { + "epoch": 0.22651189636375296, + "grad_norm": 76.35772705078125, + "learning_rate": 5e-06, + "loss": 1.3322, + "num_input_tokens_seen": 111563032, + "step": 883 + }, + { + "epoch": 0.22651189636375296, + "loss": 1.3312240839004517, + "loss_ce": 0.0016342223389074206, + "loss_iou": 0.61328125, + "loss_num": 0.02001953125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 111563032, + "step": 883 + }, + { + "epoch": 0.22676842172769832, + "grad_norm": 56.5334587097168, + "learning_rate": 5e-06, + "loss": 1.4896, + "num_input_tokens_seen": 111690080, + "step": 884 + }, + { + "epoch": 0.22676842172769832, + "loss": 1.4997889995574951, + "loss_ce": 0.001742122694849968, + "loss_iou": 0.6796875, + "loss_num": 0.0284423828125, + "loss_xval": 1.5, + "num_input_tokens_seen": 111690080, + "step": 884 + }, + { + "epoch": 0.2270249470916437, + "grad_norm": 28.566360473632812, + "learning_rate": 5e-06, + "loss": 1.1853, + "num_input_tokens_seen": 111815640, + "step": 885 + }, + { + "epoch": 0.2270249470916437, + "loss": 1.199195384979248, + "loss_ce": 0.00046489731175825, + "loss_iou": 0.55859375, + "loss_num": 0.0157470703125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 111815640, + "step": 885 + }, + { + "epoch": 0.22728147245558905, + "grad_norm": 31.808095932006836, + "learning_rate": 5e-06, + "loss": 1.2149, + "num_input_tokens_seen": 111941608, + "step": 886 + }, + { + "epoch": 0.22728147245558905, + "loss": 1.158692717552185, + "loss_ce": 0.0029309988021850586, + "loss_iou": 0.5390625, + "loss_num": 0.0159912109375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 111941608, + "step": 886 + }, + { + "epoch": 0.2275379978195344, + "grad_norm": 35.20946502685547, + "learning_rate": 5e-06, + "loss": 1.3034, + "num_input_tokens_seen": 112068000, + "step": 887 + }, + { + "epoch": 0.2275379978195344, + "loss": 1.1006805896759033, + "loss_ce": 0.0025360831059515476, + "loss_iou": 0.50390625, + "loss_num": 0.018310546875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 112068000, + "step": 887 + }, + { + "epoch": 0.22779452318347976, + "grad_norm": 46.20315170288086, + "learning_rate": 5e-06, + "loss": 1.3607, + "num_input_tokens_seen": 112193772, + "step": 888 + }, + { + "epoch": 0.22779452318347976, + "loss": 1.375450849533081, + "loss_ce": 0.0053336480632424355, + "loss_iou": 0.6328125, + "loss_num": 0.021484375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 112193772, + "step": 888 + }, + { + "epoch": 0.22805104854742514, + "grad_norm": 50.60459899902344, + "learning_rate": 5e-06, + "loss": 1.327, + "num_input_tokens_seen": 112318384, + "step": 889 + }, + { + "epoch": 0.22805104854742514, + "loss": 1.34164559841156, + "loss_ce": 0.0013135320041328669, + "loss_iou": 0.609375, + "loss_num": 0.0242919921875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 112318384, + "step": 889 + }, + { + "epoch": 0.2283075739113705, + "grad_norm": 63.13187789916992, + "learning_rate": 5e-06, + "loss": 1.3183, + "num_input_tokens_seen": 112444360, + "step": 890 + }, + { + "epoch": 0.2283075739113705, + "loss": 1.4495114088058472, + "loss_ce": 0.002245801966637373, + "loss_iou": 0.6640625, + "loss_num": 0.024169921875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 112444360, + "step": 890 + }, + { + "epoch": 0.22856409927531585, + "grad_norm": 64.76119995117188, + "learning_rate": 5e-06, + "loss": 1.4067, + "num_input_tokens_seen": 112571028, + "step": 891 + }, + { + "epoch": 0.22856409927531585, + "loss": 1.3949520587921143, + "loss_ce": 0.004327083937823772, + "loss_iou": 0.6328125, + "loss_num": 0.025634765625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 112571028, + "step": 891 + }, + { + "epoch": 0.2288206246392612, + "grad_norm": 47.076847076416016, + "learning_rate": 5e-06, + "loss": 1.3417, + "num_input_tokens_seen": 112697236, + "step": 892 + }, + { + "epoch": 0.2288206246392612, + "loss": 1.2416125535964966, + "loss_ce": 0.0018663909286260605, + "loss_iou": 0.57421875, + "loss_num": 0.01806640625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 112697236, + "step": 892 + }, + { + "epoch": 0.22907715000320655, + "grad_norm": 61.032447814941406, + "learning_rate": 5e-06, + "loss": 1.3628, + "num_input_tokens_seen": 112824148, + "step": 893 + }, + { + "epoch": 0.22907715000320655, + "loss": 1.4645155668258667, + "loss_ce": 0.004554663319140673, + "loss_iou": 0.6484375, + "loss_num": 0.032470703125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 112824148, + "step": 893 + }, + { + "epoch": 0.22933367536715193, + "grad_norm": 51.111209869384766, + "learning_rate": 5e-06, + "loss": 1.4243, + "num_input_tokens_seen": 112950352, + "step": 894 + }, + { + "epoch": 0.22933367536715193, + "loss": 1.4502856731414795, + "loss_ce": 0.0020435431506484747, + "loss_iou": 0.6796875, + "loss_num": 0.017822265625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 112950352, + "step": 894 + }, + { + "epoch": 0.2295902007310973, + "grad_norm": 58.70512771606445, + "learning_rate": 5e-06, + "loss": 1.2075, + "num_input_tokens_seen": 113076876, + "step": 895 + }, + { + "epoch": 0.2295902007310973, + "loss": 1.20267915725708, + "loss_ce": 0.0010190506000071764, + "loss_iou": 0.57421875, + "loss_num": 0.0107421875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 113076876, + "step": 895 + }, + { + "epoch": 0.22984672609504264, + "grad_norm": 85.51910400390625, + "learning_rate": 5e-06, + "loss": 1.2902, + "num_input_tokens_seen": 113202840, + "step": 896 + }, + { + "epoch": 0.22984672609504264, + "loss": 1.1747854948043823, + "loss_ce": 0.0009574600262567401, + "loss_iou": 0.5625, + "loss_num": 0.01025390625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 113202840, + "step": 896 + }, + { + "epoch": 0.230103251458988, + "grad_norm": 49.33277130126953, + "learning_rate": 5e-06, + "loss": 1.3604, + "num_input_tokens_seen": 113328056, + "step": 897 + }, + { + "epoch": 0.230103251458988, + "loss": 1.2637230157852173, + "loss_ce": 0.0005393297178670764, + "loss_iou": 0.58203125, + "loss_num": 0.0206298828125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 113328056, + "step": 897 + }, + { + "epoch": 0.23035977682293338, + "grad_norm": 52.3984489440918, + "learning_rate": 5e-06, + "loss": 1.3301, + "num_input_tokens_seen": 113454580, + "step": 898 + }, + { + "epoch": 0.23035977682293338, + "loss": 1.3169317245483398, + "loss_ce": 0.0034551904536783695, + "loss_iou": 0.60546875, + "loss_num": 0.0203857421875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 113454580, + "step": 898 + }, + { + "epoch": 0.23061630218687873, + "grad_norm": 36.869808197021484, + "learning_rate": 5e-06, + "loss": 1.3663, + "num_input_tokens_seen": 113580796, + "step": 899 + }, + { + "epoch": 0.23061630218687873, + "loss": 1.4155957698822021, + "loss_ce": 0.0005568009219132364, + "loss_iou": 0.63671875, + "loss_num": 0.028564453125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 113580796, + "step": 899 + }, + { + "epoch": 0.23087282755082408, + "grad_norm": 49.00529098510742, + "learning_rate": 5e-06, + "loss": 1.4235, + "num_input_tokens_seen": 113707456, + "step": 900 + }, + { + "epoch": 0.23087282755082408, + "loss": 1.423506259918213, + "loss_ce": 0.002119549782946706, + "loss_iou": 0.640625, + "loss_num": 0.027099609375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 113707456, + "step": 900 + }, + { + "epoch": 0.23112935291476944, + "grad_norm": 60.6417121887207, + "learning_rate": 5e-06, + "loss": 1.3792, + "num_input_tokens_seen": 113833424, + "step": 901 + }, + { + "epoch": 0.23112935291476944, + "loss": 1.5023759603500366, + "loss_ce": 0.002376062795519829, + "loss_iou": 0.6875, + "loss_num": 0.025634765625, + "loss_xval": 1.5, + "num_input_tokens_seen": 113833424, + "step": 901 + }, + { + "epoch": 0.23138587827871482, + "grad_norm": 48.91705322265625, + "learning_rate": 5e-06, + "loss": 1.5556, + "num_input_tokens_seen": 113960460, + "step": 902 + }, + { + "epoch": 0.23138587827871482, + "loss": 1.4730918407440186, + "loss_ce": 0.006295007653534412, + "loss_iou": 0.65625, + "loss_num": 0.0303955078125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 113960460, + "step": 902 + }, + { + "epoch": 0.23164240364266017, + "grad_norm": 50.46155548095703, + "learning_rate": 5e-06, + "loss": 1.1693, + "num_input_tokens_seen": 114086824, + "step": 903 + }, + { + "epoch": 0.23164240364266017, + "loss": 1.1526782512664795, + "loss_ce": 0.001310959691181779, + "loss_iou": 0.546875, + "loss_num": 0.0118408203125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 114086824, + "step": 903 + }, + { + "epoch": 0.23189892900660553, + "grad_norm": 54.59313201904297, + "learning_rate": 5e-06, + "loss": 1.4334, + "num_input_tokens_seen": 114213796, + "step": 904 + }, + { + "epoch": 0.23189892900660553, + "loss": 1.3652406930923462, + "loss_ce": 0.001471168827265501, + "loss_iou": 0.625, + "loss_num": 0.0228271484375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 114213796, + "step": 904 + }, + { + "epoch": 0.23215545437055088, + "grad_norm": 93.16931915283203, + "learning_rate": 5e-06, + "loss": 1.2587, + "num_input_tokens_seen": 114340112, + "step": 905 + }, + { + "epoch": 0.23215545437055088, + "loss": 1.3593249320983887, + "loss_ce": 0.004832704085856676, + "loss_iou": 0.60546875, + "loss_num": 0.0286865234375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 114340112, + "step": 905 + }, + { + "epoch": 0.23241197973449626, + "grad_norm": 49.08348846435547, + "learning_rate": 5e-06, + "loss": 1.4423, + "num_input_tokens_seen": 114466964, + "step": 906 + }, + { + "epoch": 0.23241197973449626, + "loss": 1.4324429035186768, + "loss_ce": 0.0017787908436730504, + "loss_iou": 0.66796875, + "loss_num": 0.019287109375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 114466964, + "step": 906 + }, + { + "epoch": 0.2326685050984416, + "grad_norm": 41.74802017211914, + "learning_rate": 5e-06, + "loss": 1.3938, + "num_input_tokens_seen": 114593444, + "step": 907 + }, + { + "epoch": 0.2326685050984416, + "loss": 1.3631118535995483, + "loss_ce": 0.0032484966795891523, + "loss_iou": 0.6015625, + "loss_num": 0.0306396484375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 114593444, + "step": 907 + }, + { + "epoch": 0.23292503046238697, + "grad_norm": 40.076210021972656, + "learning_rate": 5e-06, + "loss": 1.2671, + "num_input_tokens_seen": 114719352, + "step": 908 + }, + { + "epoch": 0.23292503046238697, + "loss": 1.4410052299499512, + "loss_ce": 0.0005755225429311395, + "loss_iou": 0.6640625, + "loss_num": 0.02197265625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 114719352, + "step": 908 + }, + { + "epoch": 0.23318155582633232, + "grad_norm": 83.48221588134766, + "learning_rate": 5e-06, + "loss": 1.4485, + "num_input_tokens_seen": 114846156, + "step": 909 + }, + { + "epoch": 0.23318155582633232, + "loss": 1.3747888803482056, + "loss_ce": 0.0007655267836526036, + "loss_iou": 0.65625, + "loss_num": 0.01287841796875, + "loss_xval": 1.375, + "num_input_tokens_seen": 114846156, + "step": 909 + }, + { + "epoch": 0.2334380811902777, + "grad_norm": 54.399654388427734, + "learning_rate": 5e-06, + "loss": 1.4232, + "num_input_tokens_seen": 114972376, + "step": 910 + }, + { + "epoch": 0.2334380811902777, + "loss": 1.487208604812622, + "loss_ce": 0.0013687292812392116, + "loss_iou": 0.68359375, + "loss_num": 0.023681640625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 114972376, + "step": 910 + }, + { + "epoch": 0.23369460655422306, + "grad_norm": 35.098358154296875, + "learning_rate": 5e-06, + "loss": 1.3081, + "num_input_tokens_seen": 115098408, + "step": 911 + }, + { + "epoch": 0.23369460655422306, + "loss": 1.2594969272613525, + "loss_ce": 0.002172773703932762, + "loss_iou": 0.5703125, + "loss_num": 0.0240478515625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 115098408, + "step": 911 + }, + { + "epoch": 0.2339511319181684, + "grad_norm": 47.6381950378418, + "learning_rate": 5e-06, + "loss": 1.2513, + "num_input_tokens_seen": 115224764, + "step": 912 + }, + { + "epoch": 0.2339511319181684, + "loss": 1.1750240325927734, + "loss_ce": 0.0011959951370954514, + "loss_iou": 0.55078125, + "loss_num": 0.01409912109375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 115224764, + "step": 912 + }, + { + "epoch": 0.23420765728211376, + "grad_norm": 59.1236457824707, + "learning_rate": 5e-06, + "loss": 1.3541, + "num_input_tokens_seen": 115350732, + "step": 913 + }, + { + "epoch": 0.23420765728211376, + "loss": 1.7742127180099487, + "loss_ce": 0.005169817246496677, + "loss_iou": 0.765625, + "loss_num": 0.04736328125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 115350732, + "step": 913 + }, + { + "epoch": 0.23446418264605912, + "grad_norm": 46.39772033691406, + "learning_rate": 5e-06, + "loss": 1.2511, + "num_input_tokens_seen": 115477568, + "step": 914 + }, + { + "epoch": 0.23446418264605912, + "loss": 1.1894718408584595, + "loss_ce": 0.0005069676553830504, + "loss_iou": 0.546875, + "loss_num": 0.018798828125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 115477568, + "step": 914 + }, + { + "epoch": 0.2347207080100045, + "grad_norm": 69.56977844238281, + "learning_rate": 5e-06, + "loss": 1.2961, + "num_input_tokens_seen": 115603948, + "step": 915 + }, + { + "epoch": 0.2347207080100045, + "loss": 1.4186091423034668, + "loss_ce": 0.0016170348972082138, + "loss_iou": 0.63671875, + "loss_num": 0.028564453125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 115603948, + "step": 915 + }, + { + "epoch": 0.23497723337394985, + "grad_norm": 48.43756866455078, + "learning_rate": 5e-06, + "loss": 1.473, + "num_input_tokens_seen": 115729652, + "step": 916 + }, + { + "epoch": 0.23497723337394985, + "loss": 1.5034310817718506, + "loss_ce": 0.0005014382768422365, + "loss_iou": 0.69140625, + "loss_num": 0.024169921875, + "loss_xval": 1.5, + "num_input_tokens_seen": 115729652, + "step": 916 + }, + { + "epoch": 0.2352337587378952, + "grad_norm": 42.2809944152832, + "learning_rate": 5e-06, + "loss": 1.3071, + "num_input_tokens_seen": 115856284, + "step": 917 + }, + { + "epoch": 0.2352337587378952, + "loss": 1.4058789014816284, + "loss_ce": 0.004023452755063772, + "loss_iou": 0.640625, + "loss_num": 0.024169921875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 115856284, + "step": 917 + }, + { + "epoch": 0.23549028410184056, + "grad_norm": 90.25298309326172, + "learning_rate": 5e-06, + "loss": 1.3609, + "num_input_tokens_seen": 115982756, + "step": 918 + }, + { + "epoch": 0.23549028410184056, + "loss": 1.307147741317749, + "loss_ce": 0.001483615837059915, + "loss_iou": 0.6015625, + "loss_num": 0.0206298828125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 115982756, + "step": 918 + }, + { + "epoch": 0.23574680946578594, + "grad_norm": 47.443450927734375, + "learning_rate": 5e-06, + "loss": 1.5922, + "num_input_tokens_seen": 116109444, + "step": 919 + }, + { + "epoch": 0.23574680946578594, + "loss": 1.695834994316101, + "loss_ce": 0.002475664485245943, + "loss_iou": 0.765625, + "loss_num": 0.0322265625, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 116109444, + "step": 919 + }, + { + "epoch": 0.2360033348297313, + "grad_norm": 42.92817687988281, + "learning_rate": 5e-06, + "loss": 1.3072, + "num_input_tokens_seen": 116236084, + "step": 920 + }, + { + "epoch": 0.2360033348297313, + "loss": 1.285005807876587, + "loss_ce": 0.002779158763587475, + "loss_iou": 0.578125, + "loss_num": 0.0245361328125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 116236084, + "step": 920 + }, + { + "epoch": 0.23625986019367665, + "grad_norm": 76.16838073730469, + "learning_rate": 5e-06, + "loss": 1.3073, + "num_input_tokens_seen": 116362756, + "step": 921 + }, + { + "epoch": 0.23625986019367665, + "loss": 1.3620072603225708, + "loss_ce": 0.0016556193586438894, + "loss_iou": 0.6171875, + "loss_num": 0.0255126953125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 116362756, + "step": 921 + }, + { + "epoch": 0.236516385557622, + "grad_norm": 54.23655319213867, + "learning_rate": 5e-06, + "loss": 1.4394, + "num_input_tokens_seen": 116489232, + "step": 922 + }, + { + "epoch": 0.236516385557622, + "loss": 1.398358702659607, + "loss_ce": 0.0008977807010523975, + "loss_iou": 0.62890625, + "loss_num": 0.0284423828125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 116489232, + "step": 922 + }, + { + "epoch": 0.23677291092156738, + "grad_norm": 36.16350555419922, + "learning_rate": 5e-06, + "loss": 1.2509, + "num_input_tokens_seen": 116614940, + "step": 923 + }, + { + "epoch": 0.23677291092156738, + "loss": 1.0950191020965576, + "loss_ce": 0.001757304766215384, + "loss_iou": 0.5, + "loss_num": 0.0185546875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 116614940, + "step": 923 + }, + { + "epoch": 0.23702943628551273, + "grad_norm": 51.979312896728516, + "learning_rate": 5e-06, + "loss": 1.2015, + "num_input_tokens_seen": 116740948, + "step": 924 + }, + { + "epoch": 0.23702943628551273, + "loss": 1.086332082748413, + "loss_ce": 0.002836087252944708, + "loss_iou": 0.5, + "loss_num": 0.0164794921875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 116740948, + "step": 924 + }, + { + "epoch": 0.2372859616494581, + "grad_norm": 66.16816711425781, + "learning_rate": 5e-06, + "loss": 1.4507, + "num_input_tokens_seen": 116868292, + "step": 925 + }, + { + "epoch": 0.2372859616494581, + "loss": 1.6645116806030273, + "loss_ce": 0.000449151499196887, + "loss_iou": 0.7421875, + "loss_num": 0.03564453125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 116868292, + "step": 925 + }, + { + "epoch": 0.23754248701340344, + "grad_norm": 47.48419952392578, + "learning_rate": 5e-06, + "loss": 1.3316, + "num_input_tokens_seen": 116994520, + "step": 926 + }, + { + "epoch": 0.23754248701340344, + "loss": 1.4684863090515137, + "loss_ce": 0.0026660198345780373, + "loss_iou": 0.69140625, + "loss_num": 0.0159912109375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 116994520, + "step": 926 + }, + { + "epoch": 0.23779901237734882, + "grad_norm": 71.9028091430664, + "learning_rate": 5e-06, + "loss": 1.3373, + "num_input_tokens_seen": 117120080, + "step": 927 + }, + { + "epoch": 0.23779901237734882, + "loss": 1.224534273147583, + "loss_ce": 0.00041311001405119896, + "loss_iou": 0.5703125, + "loss_num": 0.0167236328125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 117120080, + "step": 927 + }, + { + "epoch": 0.23805553774129418, + "grad_norm": 73.9217529296875, + "learning_rate": 5e-06, + "loss": 1.5309, + "num_input_tokens_seen": 117246264, + "step": 928 + }, + { + "epoch": 0.23805553774129418, + "loss": 1.2888407707214355, + "loss_ce": 0.0007549205329269171, + "loss_iou": 0.59375, + "loss_num": 0.0205078125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 117246264, + "step": 928 + }, + { + "epoch": 0.23831206310523953, + "grad_norm": 32.889286041259766, + "learning_rate": 5e-06, + "loss": 1.2808, + "num_input_tokens_seen": 117371752, + "step": 929 + }, + { + "epoch": 0.23831206310523953, + "loss": 1.2564622163772583, + "loss_ce": 0.0010911113349720836, + "loss_iou": 0.5703125, + "loss_num": 0.023193359375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 117371752, + "step": 929 + }, + { + "epoch": 0.23856858846918488, + "grad_norm": 52.7745475769043, + "learning_rate": 5e-06, + "loss": 1.476, + "num_input_tokens_seen": 117497356, + "step": 930 + }, + { + "epoch": 0.23856858846918488, + "loss": 1.5290281772613525, + "loss_ce": 0.0011962195858359337, + "loss_iou": 0.66796875, + "loss_num": 0.037841796875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 117497356, + "step": 930 + }, + { + "epoch": 0.23882511383313024, + "grad_norm": 59.719825744628906, + "learning_rate": 5e-06, + "loss": 1.3672, + "num_input_tokens_seen": 117623312, + "step": 931 + }, + { + "epoch": 0.23882511383313024, + "loss": 1.2019193172454834, + "loss_ce": 0.0007475175079889596, + "loss_iou": 0.5703125, + "loss_num": 0.0123291015625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 117623312, + "step": 931 + }, + { + "epoch": 0.23908163919707562, + "grad_norm": 55.541954040527344, + "learning_rate": 5e-06, + "loss": 1.4339, + "num_input_tokens_seen": 117748572, + "step": 932 + }, + { + "epoch": 0.23908163919707562, + "loss": 1.7328662872314453, + "loss_ce": 0.0009327193256467581, + "loss_iou": 0.73828125, + "loss_num": 0.05126953125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 117748572, + "step": 932 + }, + { + "epoch": 0.23933816456102097, + "grad_norm": 47.9677734375, + "learning_rate": 5e-06, + "loss": 1.2442, + "num_input_tokens_seen": 117874284, + "step": 933 + }, + { + "epoch": 0.23933816456102097, + "loss": 1.149627685546875, + "loss_ce": 0.0026551554910838604, + "loss_iou": 0.53125, + "loss_num": 0.0162353515625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 117874284, + "step": 933 + }, + { + "epoch": 0.23959468992496633, + "grad_norm": 63.40522766113281, + "learning_rate": 5e-06, + "loss": 1.5597, + "num_input_tokens_seen": 118000772, + "step": 934 + }, + { + "epoch": 0.23959468992496633, + "loss": 1.7454923391342163, + "loss_ce": 0.0033048957120627165, + "loss_iou": 0.78515625, + "loss_num": 0.0341796875, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 118000772, + "step": 934 + }, + { + "epoch": 0.23985121528891168, + "grad_norm": 42.123023986816406, + "learning_rate": 5e-06, + "loss": 1.2598, + "num_input_tokens_seen": 118127868, + "step": 935 + }, + { + "epoch": 0.23985121528891168, + "loss": 1.318880319595337, + "loss_ce": 0.0014974601799622178, + "loss_iou": 0.60546875, + "loss_num": 0.0216064453125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 118127868, + "step": 935 + }, + { + "epoch": 0.24010774065285706, + "grad_norm": 28.688692092895508, + "learning_rate": 5e-06, + "loss": 1.2384, + "num_input_tokens_seen": 118255216, + "step": 936 + }, + { + "epoch": 0.24010774065285706, + "loss": 1.2823504209518433, + "loss_ce": 0.002565225353464484, + "loss_iou": 0.59765625, + "loss_num": 0.0166015625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 118255216, + "step": 936 + }, + { + "epoch": 0.2403642660168024, + "grad_norm": 42.86612319946289, + "learning_rate": 5e-06, + "loss": 1.2441, + "num_input_tokens_seen": 118380976, + "step": 937 + }, + { + "epoch": 0.2403642660168024, + "loss": 1.3592746257781982, + "loss_ce": 0.0028293065261095762, + "loss_iou": 0.62109375, + "loss_num": 0.02294921875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 118380976, + "step": 937 + }, + { + "epoch": 0.24062079138074777, + "grad_norm": 60.78129577636719, + "learning_rate": 5e-06, + "loss": 1.3635, + "num_input_tokens_seen": 118506612, + "step": 938 + }, + { + "epoch": 0.24062079138074777, + "loss": 1.2826480865478516, + "loss_ce": 0.0004214321088511497, + "loss_iou": 0.6015625, + "loss_num": 0.0166015625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 118506612, + "step": 938 + }, + { + "epoch": 0.24087731674469312, + "grad_norm": 48.75016403198242, + "learning_rate": 5e-06, + "loss": 1.3597, + "num_input_tokens_seen": 118632592, + "step": 939 + }, + { + "epoch": 0.24087731674469312, + "loss": 1.2833774089813232, + "loss_ce": 0.0006625246023759246, + "loss_iou": 0.59765625, + "loss_num": 0.017333984375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 118632592, + "step": 939 + }, + { + "epoch": 0.2411338421086385, + "grad_norm": 56.442256927490234, + "learning_rate": 5e-06, + "loss": 1.3694, + "num_input_tokens_seen": 118758636, + "step": 940 + }, + { + "epoch": 0.2411338421086385, + "loss": 1.3086743354797363, + "loss_ce": 0.0025218899827450514, + "loss_iou": 0.60546875, + "loss_num": 0.0186767578125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 118758636, + "step": 940 + }, + { + "epoch": 0.24139036747258386, + "grad_norm": 62.21440887451172, + "learning_rate": 5e-06, + "loss": 1.5033, + "num_input_tokens_seen": 118885372, + "step": 941 + }, + { + "epoch": 0.24139036747258386, + "loss": 1.5830628871917725, + "loss_ce": 0.003961273934692144, + "loss_iou": 0.69921875, + "loss_num": 0.037109375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 118885372, + "step": 941 + }, + { + "epoch": 0.2416468928365292, + "grad_norm": 71.10104370117188, + "learning_rate": 5e-06, + "loss": 1.2683, + "num_input_tokens_seen": 119011208, + "step": 942 + }, + { + "epoch": 0.2416468928365292, + "loss": 1.086031436920166, + "loss_ce": 0.0005821855738759041, + "loss_iou": 0.5078125, + "loss_num": 0.01385498046875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 119011208, + "step": 942 + }, + { + "epoch": 0.24190341820047456, + "grad_norm": 38.6158561706543, + "learning_rate": 5e-06, + "loss": 1.433, + "num_input_tokens_seen": 119137268, + "step": 943 + }, + { + "epoch": 0.24190341820047456, + "loss": 1.4961329698562622, + "loss_ce": 0.004922008141875267, + "loss_iou": 0.66796875, + "loss_num": 0.03173828125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 119137268, + "step": 943 + }, + { + "epoch": 0.24215994356441994, + "grad_norm": 36.52344512939453, + "learning_rate": 5e-06, + "loss": 1.2291, + "num_input_tokens_seen": 119263204, + "step": 944 + }, + { + "epoch": 0.24215994356441994, + "loss": 1.370254397392273, + "loss_ce": 0.002578596817329526, + "loss_iou": 0.61328125, + "loss_num": 0.02734375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 119263204, + "step": 944 + }, + { + "epoch": 0.2424164689283653, + "grad_norm": 44.93699645996094, + "learning_rate": 5e-06, + "loss": 1.2546, + "num_input_tokens_seen": 119389164, + "step": 945 + }, + { + "epoch": 0.2424164689283653, + "loss": 1.325518012046814, + "loss_ce": 0.0012992597185075283, + "loss_iou": 0.60546875, + "loss_num": 0.0220947265625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 119389164, + "step": 945 + }, + { + "epoch": 0.24267299429231065, + "grad_norm": 88.58002471923828, + "learning_rate": 5e-06, + "loss": 1.2912, + "num_input_tokens_seen": 119515940, + "step": 946 + }, + { + "epoch": 0.24267299429231065, + "loss": 1.2593683004379272, + "loss_ce": 0.0005792162264697254, + "loss_iou": 0.58203125, + "loss_num": 0.0189208984375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 119515940, + "step": 946 + }, + { + "epoch": 0.242929519656256, + "grad_norm": 55.316993713378906, + "learning_rate": 5e-06, + "loss": 1.454, + "num_input_tokens_seen": 119642432, + "step": 947 + }, + { + "epoch": 0.242929519656256, + "loss": 1.5239250659942627, + "loss_ce": 0.0009758782107383013, + "loss_iou": 0.69140625, + "loss_num": 0.028564453125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 119642432, + "step": 947 + }, + { + "epoch": 0.24318604502020139, + "grad_norm": 44.7547607421875, + "learning_rate": 5e-06, + "loss": 1.2484, + "num_input_tokens_seen": 119769392, + "step": 948 + }, + { + "epoch": 0.24318604502020139, + "loss": 1.219508171081543, + "loss_ce": 0.0017348392866551876, + "loss_iou": 0.5546875, + "loss_num": 0.021728515625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 119769392, + "step": 948 + }, + { + "epoch": 0.24344257038414674, + "grad_norm": 82.32073974609375, + "learning_rate": 5e-06, + "loss": 1.2743, + "num_input_tokens_seen": 119896988, + "step": 949 + }, + { + "epoch": 0.24344257038414674, + "loss": 1.3550242185592651, + "loss_ce": 0.007367984391748905, + "loss_iou": 0.6015625, + "loss_num": 0.0294189453125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 119896988, + "step": 949 + }, + { + "epoch": 0.2436990957480921, + "grad_norm": 54.977516174316406, + "learning_rate": 5e-06, + "loss": 1.3757, + "num_input_tokens_seen": 120023292, + "step": 950 + }, + { + "epoch": 0.2436990957480921, + "loss": 1.4976322650909424, + "loss_ce": 0.0005620034644380212, + "loss_iou": 0.69140625, + "loss_num": 0.023681640625, + "loss_xval": 1.5, + "num_input_tokens_seen": 120023292, + "step": 950 + }, + { + "epoch": 0.24395562111203745, + "grad_norm": 54.648658752441406, + "learning_rate": 5e-06, + "loss": 1.1634, + "num_input_tokens_seen": 120148804, + "step": 951 + }, + { + "epoch": 0.24395562111203745, + "loss": 1.1707106828689575, + "loss_ce": 0.0037185377441346645, + "loss_iou": 0.546875, + "loss_num": 0.01416015625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 120148804, + "step": 951 + }, + { + "epoch": 0.2442121464759828, + "grad_norm": 38.99605178833008, + "learning_rate": 5e-06, + "loss": 1.2874, + "num_input_tokens_seen": 120275464, + "step": 952 + }, + { + "epoch": 0.2442121464759828, + "loss": 1.1615612506866455, + "loss_ce": 0.0033580332528799772, + "loss_iou": 0.52734375, + "loss_num": 0.020751953125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 120275464, + "step": 952 + }, + { + "epoch": 0.24446867183992818, + "grad_norm": 91.07843780517578, + "learning_rate": 5e-06, + "loss": 1.2769, + "num_input_tokens_seen": 120401276, + "step": 953 + }, + { + "epoch": 0.24446867183992818, + "loss": 1.0841130018234253, + "loss_ce": 0.0008610707009211183, + "loss_iou": 0.515625, + "loss_num": 0.0103759765625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 120401276, + "step": 953 + }, + { + "epoch": 0.24472519720387353, + "grad_norm": 46.4586181640625, + "learning_rate": 5e-06, + "loss": 1.5765, + "num_input_tokens_seen": 120527980, + "step": 954 + }, + { + "epoch": 0.24472519720387353, + "loss": 1.7352337837219238, + "loss_ce": 0.0018354374915361404, + "loss_iou": 0.78125, + "loss_num": 0.03369140625, + "loss_xval": 1.734375, + "num_input_tokens_seen": 120527980, + "step": 954 + }, + { + "epoch": 0.2449817225678189, + "grad_norm": 58.7501106262207, + "learning_rate": 5e-06, + "loss": 1.1938, + "num_input_tokens_seen": 120653468, + "step": 955 + }, + { + "epoch": 0.2449817225678189, + "loss": 1.2133777141571045, + "loss_ce": 0.001951937098056078, + "loss_iou": 0.57421875, + "loss_num": 0.0130615234375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 120653468, + "step": 955 + }, + { + "epoch": 0.24523824793176424, + "grad_norm": 71.12825775146484, + "learning_rate": 5e-06, + "loss": 1.2856, + "num_input_tokens_seen": 120780120, + "step": 956 + }, + { + "epoch": 0.24523824793176424, + "loss": 1.3722120523452759, + "loss_ce": 0.0020948878955096006, + "loss_iou": 0.61328125, + "loss_num": 0.0281982421875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 120780120, + "step": 956 + }, + { + "epoch": 0.24549477329570962, + "grad_norm": 46.739830017089844, + "learning_rate": 5e-06, + "loss": 1.2695, + "num_input_tokens_seen": 120906140, + "step": 957 + }, + { + "epoch": 0.24549477329570962, + "loss": 1.3327444791793823, + "loss_ce": 0.0012015195097774267, + "loss_iou": 0.6171875, + "loss_num": 0.02001953125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 120906140, + "step": 957 + }, + { + "epoch": 0.24575129865965498, + "grad_norm": 45.56647491455078, + "learning_rate": 5e-06, + "loss": 1.4023, + "num_input_tokens_seen": 121031824, + "step": 958 + }, + { + "epoch": 0.24575129865965498, + "loss": 1.650170087814331, + "loss_ce": 0.0017324851360172033, + "loss_iou": 0.75, + "loss_num": 0.029541015625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 121031824, + "step": 958 + }, + { + "epoch": 0.24600782402360033, + "grad_norm": 68.96953582763672, + "learning_rate": 5e-06, + "loss": 1.2962, + "num_input_tokens_seen": 121157420, + "step": 959 + }, + { + "epoch": 0.24600782402360033, + "loss": 1.2955929040908813, + "loss_ce": 0.0006709928857162595, + "loss_iou": 0.61328125, + "loss_num": 0.0133056640625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 121157420, + "step": 959 + }, + { + "epoch": 0.24626434938754568, + "grad_norm": 50.26723861694336, + "learning_rate": 5e-06, + "loss": 1.3148, + "num_input_tokens_seen": 121282808, + "step": 960 + }, + { + "epoch": 0.24626434938754568, + "loss": 1.5083248615264893, + "loss_ce": 0.0014888246078044176, + "loss_iou": 0.6640625, + "loss_num": 0.035400390625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 121282808, + "step": 960 + }, + { + "epoch": 0.24652087475149106, + "grad_norm": 45.44943618774414, + "learning_rate": 5e-06, + "loss": 1.3118, + "num_input_tokens_seen": 121408376, + "step": 961 + }, + { + "epoch": 0.24652087475149106, + "loss": 1.2075881958007812, + "loss_ce": 0.00348653644323349, + "loss_iou": 0.55859375, + "loss_num": 0.01708984375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 121408376, + "step": 961 + }, + { + "epoch": 0.24677740011543642, + "grad_norm": 67.64009857177734, + "learning_rate": 5e-06, + "loss": 1.4012, + "num_input_tokens_seen": 121533060, + "step": 962 + }, + { + "epoch": 0.24677740011543642, + "loss": 1.558396816253662, + "loss_ce": 0.009080387651920319, + "loss_iou": 0.66796875, + "loss_num": 0.042236328125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 121533060, + "step": 962 + }, + { + "epoch": 0.24703392547938177, + "grad_norm": 57.97288513183594, + "learning_rate": 5e-06, + "loss": 1.29, + "num_input_tokens_seen": 121659272, + "step": 963 + }, + { + "epoch": 0.24703392547938177, + "loss": 1.2188966274261475, + "loss_ce": 0.001123210065998137, + "loss_iou": 0.5625, + "loss_num": 0.01806640625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 121659272, + "step": 963 + }, + { + "epoch": 0.24729045084332713, + "grad_norm": 33.57471466064453, + "learning_rate": 5e-06, + "loss": 1.3, + "num_input_tokens_seen": 121784468, + "step": 964 + }, + { + "epoch": 0.24729045084332713, + "loss": 1.1365251541137695, + "loss_ce": 0.0007830787217244506, + "loss_iou": 0.5234375, + "loss_num": 0.017822265625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 121784468, + "step": 964 + }, + { + "epoch": 0.2475469762072725, + "grad_norm": 57.19917678833008, + "learning_rate": 5e-06, + "loss": 1.3693, + "num_input_tokens_seen": 121910648, + "step": 965 + }, + { + "epoch": 0.2475469762072725, + "loss": 1.361912488937378, + "loss_ce": 0.002049215603619814, + "loss_iou": 0.625, + "loss_num": 0.021240234375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 121910648, + "step": 965 + }, + { + "epoch": 0.24780350157121786, + "grad_norm": 56.39918518066406, + "learning_rate": 5e-06, + "loss": 1.2543, + "num_input_tokens_seen": 122035936, + "step": 966 + }, + { + "epoch": 0.24780350157121786, + "loss": 1.2416245937347412, + "loss_ce": 0.001390199875459075, + "loss_iou": 0.5625, + "loss_num": 0.0225830078125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 122035936, + "step": 966 + }, + { + "epoch": 0.2480600269351632, + "grad_norm": 74.14239501953125, + "learning_rate": 5e-06, + "loss": 1.2815, + "num_input_tokens_seen": 122162752, + "step": 967 + }, + { + "epoch": 0.2480600269351632, + "loss": 1.2067949771881104, + "loss_ce": 0.000740223447792232, + "loss_iou": 0.5703125, + "loss_num": 0.01348876953125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 122162752, + "step": 967 + }, + { + "epoch": 0.24831655229910857, + "grad_norm": 53.837646484375, + "learning_rate": 5e-06, + "loss": 1.4405, + "num_input_tokens_seen": 122289332, + "step": 968 + }, + { + "epoch": 0.24831655229910857, + "loss": 1.4458200931549072, + "loss_ce": 0.0014841724187135696, + "loss_iou": 0.66796875, + "loss_num": 0.021484375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 122289332, + "step": 968 + }, + { + "epoch": 0.24857307766305392, + "grad_norm": 27.323348999023438, + "learning_rate": 5e-06, + "loss": 1.2665, + "num_input_tokens_seen": 122415016, + "step": 969 + }, + { + "epoch": 0.24857307766305392, + "loss": 1.098299503326416, + "loss_ce": 0.0011315593728795648, + "loss_iou": 0.5, + "loss_num": 0.0196533203125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 122415016, + "step": 969 + }, + { + "epoch": 0.2488296030269993, + "grad_norm": 44.42942810058594, + "learning_rate": 5e-06, + "loss": 1.1851, + "num_input_tokens_seen": 122541628, + "step": 970 + }, + { + "epoch": 0.2488296030269993, + "loss": 1.0224496126174927, + "loss_ce": 0.0019417600706219673, + "loss_iou": 0.48828125, + "loss_num": 0.008544921875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 122541628, + "step": 970 + }, + { + "epoch": 0.24908612839094466, + "grad_norm": 39.173648834228516, + "learning_rate": 5e-06, + "loss": 1.4571, + "num_input_tokens_seen": 122668516, + "step": 971 + }, + { + "epoch": 0.24908612839094466, + "loss": 1.3417680263519287, + "loss_ce": 0.00241252314299345, + "loss_iou": 0.60546875, + "loss_num": 0.025634765625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 122668516, + "step": 971 + }, + { + "epoch": 0.24934265375489, + "grad_norm": 50.44733810424805, + "learning_rate": 5e-06, + "loss": 1.1845, + "num_input_tokens_seen": 122793384, + "step": 972 + }, + { + "epoch": 0.24934265375489, + "loss": 1.4358099699020386, + "loss_ce": 0.002216234803199768, + "loss_iou": 0.63671875, + "loss_num": 0.0322265625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 122793384, + "step": 972 + }, + { + "epoch": 0.24959917911883536, + "grad_norm": 99.62310028076172, + "learning_rate": 5e-06, + "loss": 1.4368, + "num_input_tokens_seen": 122921236, + "step": 973 + }, + { + "epoch": 0.24959917911883536, + "loss": 1.345449447631836, + "loss_ce": 0.0016994993202388287, + "loss_iou": 0.63671875, + "loss_num": 0.01458740234375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 122921236, + "step": 973 + }, + { + "epoch": 0.24985570448278074, + "grad_norm": 44.25408935546875, + "learning_rate": 5e-06, + "loss": 1.39, + "num_input_tokens_seen": 123046860, + "step": 974 + }, + { + "epoch": 0.24985570448278074, + "loss": 1.536024808883667, + "loss_ce": 0.001845052931457758, + "loss_iou": 0.7109375, + "loss_num": 0.0234375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 123046860, + "step": 974 + }, + { + "epoch": 0.2501122298467261, + "grad_norm": 27.052757263183594, + "learning_rate": 5e-06, + "loss": 1.3125, + "num_input_tokens_seen": 123172024, + "step": 975 + }, + { + "epoch": 0.2501122298467261, + "loss": 1.5058351755142212, + "loss_ce": 0.0019289260962978005, + "loss_iou": 0.68359375, + "loss_num": 0.0284423828125, + "loss_xval": 1.5, + "num_input_tokens_seen": 123172024, + "step": 975 + }, + { + "epoch": 0.2503687552106715, + "grad_norm": 27.093488693237305, + "learning_rate": 5e-06, + "loss": 1.3169, + "num_input_tokens_seen": 123298276, + "step": 976 + }, + { + "epoch": 0.2503687552106715, + "loss": 1.2444758415222168, + "loss_ce": 0.0003352178609929979, + "loss_iou": 0.57421875, + "loss_num": 0.018798828125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 123298276, + "step": 976 + }, + { + "epoch": 0.2506252805746168, + "grad_norm": 25.7451229095459, + "learning_rate": 5e-06, + "loss": 1.2262, + "num_input_tokens_seen": 123424800, + "step": 977 + }, + { + "epoch": 0.2506252805746168, + "loss": 1.1199061870574951, + "loss_ce": 0.004671868868172169, + "loss_iou": 0.5, + "loss_num": 0.023193359375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 123424800, + "step": 977 + }, + { + "epoch": 0.2508818059385622, + "grad_norm": 32.192726135253906, + "learning_rate": 5e-06, + "loss": 1.3083, + "num_input_tokens_seen": 123552064, + "step": 978 + }, + { + "epoch": 0.2508818059385622, + "loss": 1.3389650583267212, + "loss_ce": 0.0010744095779955387, + "loss_iou": 0.609375, + "loss_num": 0.024658203125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 123552064, + "step": 978 + }, + { + "epoch": 0.2511383313025075, + "grad_norm": 79.4610824584961, + "learning_rate": 5e-06, + "loss": 1.3519, + "num_input_tokens_seen": 123678808, + "step": 979 + }, + { + "epoch": 0.2511383313025075, + "loss": 1.282730221748352, + "loss_ce": 0.004409873858094215, + "loss_iou": 0.57421875, + "loss_num": 0.0257568359375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 123678808, + "step": 979 + }, + { + "epoch": 0.2513948566664529, + "grad_norm": 125.48126983642578, + "learning_rate": 5e-06, + "loss": 1.446, + "num_input_tokens_seen": 123805344, + "step": 980 + }, + { + "epoch": 0.2513948566664529, + "loss": 1.6013734340667725, + "loss_ce": 0.0007874965085648, + "loss_iou": 0.7265625, + "loss_num": 0.029296875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 123805344, + "step": 980 + }, + { + "epoch": 0.2516513820303983, + "grad_norm": 40.6495246887207, + "learning_rate": 5e-06, + "loss": 1.2314, + "num_input_tokens_seen": 123931912, + "step": 981 + }, + { + "epoch": 0.2516513820303983, + "loss": 1.3292638063430786, + "loss_ce": 0.0050450400449335575, + "loss_iou": 0.59765625, + "loss_num": 0.0260009765625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 123931912, + "step": 981 + }, + { + "epoch": 0.2519079073943436, + "grad_norm": 66.79524230957031, + "learning_rate": 5e-06, + "loss": 1.1997, + "num_input_tokens_seen": 124057364, + "step": 982 + }, + { + "epoch": 0.2519079073943436, + "loss": 1.376494288444519, + "loss_ce": 0.000517748761922121, + "loss_iou": 0.640625, + "loss_num": 0.0196533203125, + "loss_xval": 1.375, + "num_input_tokens_seen": 124057364, + "step": 982 + }, + { + "epoch": 0.252164432758289, + "grad_norm": 49.750221252441406, + "learning_rate": 5e-06, + "loss": 1.3311, + "num_input_tokens_seen": 124184436, + "step": 983 + }, + { + "epoch": 0.252164432758289, + "loss": 1.2988307476043701, + "loss_ce": 0.0009791739284992218, + "loss_iou": 0.60546875, + "loss_num": 0.0181884765625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 124184436, + "step": 983 + }, + { + "epoch": 0.25242095812223436, + "grad_norm": 53.73994064331055, + "learning_rate": 5e-06, + "loss": 1.1984, + "num_input_tokens_seen": 124311700, + "step": 984 + }, + { + "epoch": 0.25242095812223436, + "loss": 1.344632625579834, + "loss_ce": 0.0033241058699786663, + "loss_iou": 0.61328125, + "loss_num": 0.022705078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 124311700, + "step": 984 + }, + { + "epoch": 0.2526774834861797, + "grad_norm": 69.63384246826172, + "learning_rate": 5e-06, + "loss": 1.3606, + "num_input_tokens_seen": 124438252, + "step": 985 + }, + { + "epoch": 0.2526774834861797, + "loss": 1.2934606075286865, + "loss_ce": 0.0004919215571135283, + "loss_iou": 0.60546875, + "loss_num": 0.01611328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 124438252, + "step": 985 + }, + { + "epoch": 0.25293400885012507, + "grad_norm": 86.85189056396484, + "learning_rate": 5e-06, + "loss": 1.3455, + "num_input_tokens_seen": 124564672, + "step": 986 + }, + { + "epoch": 0.25293400885012507, + "loss": 1.2548818588256836, + "loss_ce": 0.0024404507130384445, + "loss_iou": 0.58203125, + "loss_num": 0.017578125, + "loss_xval": 1.25, + "num_input_tokens_seen": 124564672, + "step": 986 + }, + { + "epoch": 0.2531905342140704, + "grad_norm": 49.68449401855469, + "learning_rate": 5e-06, + "loss": 1.4144, + "num_input_tokens_seen": 124690052, + "step": 987 + }, + { + "epoch": 0.2531905342140704, + "loss": 1.5087974071502686, + "loss_ce": 0.0004966843407601118, + "loss_iou": 0.703125, + "loss_num": 0.0206298828125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 124690052, + "step": 987 + }, + { + "epoch": 0.2534470595780158, + "grad_norm": 34.75070571899414, + "learning_rate": 5e-06, + "loss": 1.1187, + "num_input_tokens_seen": 124816300, + "step": 988 + }, + { + "epoch": 0.2534470595780158, + "loss": 1.171462893486023, + "loss_ce": 0.0015410316409543157, + "loss_iou": 0.546875, + "loss_num": 0.01544189453125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 124816300, + "step": 988 + }, + { + "epoch": 0.25370358494196116, + "grad_norm": 39.807132720947266, + "learning_rate": 5e-06, + "loss": 1.2715, + "num_input_tokens_seen": 124942248, + "step": 989 + }, + { + "epoch": 0.25370358494196116, + "loss": 1.301980972290039, + "loss_ce": 0.001199742779135704, + "loss_iou": 0.58203125, + "loss_num": 0.027099609375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 124942248, + "step": 989 + }, + { + "epoch": 0.2539601103059065, + "grad_norm": 94.57109832763672, + "learning_rate": 5e-06, + "loss": 1.3081, + "num_input_tokens_seen": 125068868, + "step": 990 + }, + { + "epoch": 0.2539601103059065, + "loss": 1.2548375129699707, + "loss_ce": 0.00434919586405158, + "loss_iou": 0.58203125, + "loss_num": 0.0169677734375, + "loss_xval": 1.25, + "num_input_tokens_seen": 125068868, + "step": 990 + }, + { + "epoch": 0.25421663566985186, + "grad_norm": 52.12382125854492, + "learning_rate": 5e-06, + "loss": 1.6295, + "num_input_tokens_seen": 125196324, + "step": 991 + }, + { + "epoch": 0.25421663566985186, + "loss": 1.6332857608795166, + "loss_ce": 0.0029146838933229446, + "loss_iou": 0.71484375, + "loss_num": 0.0400390625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 125196324, + "step": 991 + }, + { + "epoch": 0.2544731610337972, + "grad_norm": 46.503604888916016, + "learning_rate": 5e-06, + "loss": 1.2065, + "num_input_tokens_seen": 125323060, + "step": 992 + }, + { + "epoch": 0.2544731610337972, + "loss": 1.2035973072052002, + "loss_ce": 0.0009604988154023886, + "loss_iou": 0.5625, + "loss_num": 0.0157470703125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 125323060, + "step": 992 + }, + { + "epoch": 0.25472968639774257, + "grad_norm": 97.95872497558594, + "learning_rate": 5e-06, + "loss": 1.2779, + "num_input_tokens_seen": 125450492, + "step": 993 + }, + { + "epoch": 0.25472968639774257, + "loss": 1.2648959159851074, + "loss_ce": 0.0012240895302966237, + "loss_iou": 0.5859375, + "loss_num": 0.017578125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 125450492, + "step": 993 + }, + { + "epoch": 0.25498621176168795, + "grad_norm": 51.53623580932617, + "learning_rate": 5e-06, + "loss": 1.5197, + "num_input_tokens_seen": 125576772, + "step": 994 + }, + { + "epoch": 0.25498621176168795, + "loss": 1.4663052558898926, + "loss_ce": 0.003414657199755311, + "loss_iou": 0.65234375, + "loss_num": 0.031982421875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 125576772, + "step": 994 + }, + { + "epoch": 0.2552427371256333, + "grad_norm": 51.15315628051758, + "learning_rate": 5e-06, + "loss": 1.2115, + "num_input_tokens_seen": 125704808, + "step": 995 + }, + { + "epoch": 0.2552427371256333, + "loss": 1.2527072429656982, + "loss_ce": 0.0027072362136095762, + "loss_iou": 0.5625, + "loss_num": 0.024658203125, + "loss_xval": 1.25, + "num_input_tokens_seen": 125704808, + "step": 995 + }, + { + "epoch": 0.25549926248957866, + "grad_norm": 62.99626541137695, + "learning_rate": 5e-06, + "loss": 1.2732, + "num_input_tokens_seen": 125831036, + "step": 996 + }, + { + "epoch": 0.25549926248957866, + "loss": 1.3592969179153442, + "loss_ce": 0.0008985073654912412, + "loss_iou": 0.61328125, + "loss_num": 0.0263671875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 125831036, + "step": 996 + }, + { + "epoch": 0.25575578785352404, + "grad_norm": 58.747352600097656, + "learning_rate": 5e-06, + "loss": 1.3329, + "num_input_tokens_seen": 125956336, + "step": 997 + }, + { + "epoch": 0.25575578785352404, + "loss": 1.351613998413086, + "loss_ce": 0.0005398673238232732, + "loss_iou": 0.6328125, + "loss_num": 0.01708984375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 125956336, + "step": 997 + }, + { + "epoch": 0.25601231321746937, + "grad_norm": 73.60084533691406, + "learning_rate": 5e-06, + "loss": 1.2211, + "num_input_tokens_seen": 126083664, + "step": 998 + }, + { + "epoch": 0.25601231321746937, + "loss": 1.1863772869110107, + "loss_ce": 0.0022953087463974953, + "loss_iou": 0.5625, + "loss_num": 0.01226806640625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 126083664, + "step": 998 + }, + { + "epoch": 0.25626883858141475, + "grad_norm": 65.99295043945312, + "learning_rate": 5e-06, + "loss": 1.3937, + "num_input_tokens_seen": 126210400, + "step": 999 + }, + { + "epoch": 0.25626883858141475, + "loss": 1.3085553646087646, + "loss_ce": 0.0019147041020914912, + "loss_iou": 0.60546875, + "loss_num": 0.019775390625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 126210400, + "step": 999 + }, + { + "epoch": 0.2565253639453601, + "grad_norm": 63.51201248168945, + "learning_rate": 5e-06, + "loss": 1.1075, + "num_input_tokens_seen": 126337192, + "step": 1000 + }, + { + "epoch": 0.2565253639453601, + "eval_icons_CIoU": 0.032705364137655124, + "eval_icons_GIoU": 0.00907462788745761, + "eval_icons_IoU": 0.20075911283493042, + "eval_icons_MAE_all": 0.04811485670506954, + "eval_icons_MAE_h": 0.06917825900018215, + "eval_icons_MAE_w": 0.05737338587641716, + "eval_icons_MAE_x_boxes": 0.05596754886209965, + "eval_icons_MAE_y_boxes": 0.06130226328969002, + "eval_icons_NUM_probability": 0.9998932778835297, + "eval_icons_inside_bbox": 0.4444444477558136, + "eval_icons_loss": 2.1861820220947266, + "eval_icons_loss_ce": 0.0005034492714912631, + "eval_icons_loss_iou": 0.982177734375, + "eval_icons_loss_num": 0.05434417724609375, + "eval_icons_loss_xval": 2.236328125, + "eval_icons_runtime": 40.0599, + "eval_icons_samples_per_second": 1.248, + "eval_icons_steps_per_second": 0.05, + "num_input_tokens_seen": 126337192, + "step": 1000 + }, + { + "epoch": 0.2565253639453601, + "eval_screenspot_CIoU": 0.055625010281801224, + "eval_screenspot_GIoU": 0.03979986781875292, + "eval_screenspot_IoU": 0.23322050273418427, + "eval_screenspot_MAE_all": 0.09227462112903595, + "eval_screenspot_MAE_h": 0.08019755035638809, + "eval_screenspot_MAE_w": 0.14348072310288748, + "eval_screenspot_MAE_x_boxes": 0.12140942613283794, + "eval_screenspot_MAE_y_boxes": 0.060147623221079506, + "eval_screenspot_NUM_probability": 0.9997183680534363, + "eval_screenspot_inside_bbox": 0.5808333357175192, + "eval_screenspot_loss": 2.426804780960083, + "eval_screenspot_loss_ce": 0.004263625790675481, + "eval_screenspot_loss_iou": 0.9866536458333334, + "eval_screenspot_loss_num": 0.099456787109375, + "eval_screenspot_loss_xval": 2.4697265625, + "eval_screenspot_runtime": 69.7969, + "eval_screenspot_samples_per_second": 1.275, + "eval_screenspot_steps_per_second": 0.043, + "num_input_tokens_seen": 126337192, + "step": 1000 + }, + { + "epoch": 0.2565253639453601, + "loss": 2.402255058288574, + "loss_ce": 0.004794216249138117, + "loss_iou": 0.96484375, + "loss_num": 0.09375, + "loss_xval": 2.390625, + "num_input_tokens_seen": 126337192, + "step": 1000 + }, + { + "epoch": 0.25678188930930546, + "grad_norm": 55.844486236572266, + "learning_rate": 5e-06, + "loss": 1.2759, + "num_input_tokens_seen": 126464160, + "step": 1001 + }, + { + "epoch": 0.25678188930930546, + "loss": 1.3171335458755493, + "loss_ce": 0.0012154907453805208, + "loss_iou": 0.6171875, + "loss_num": 0.0162353515625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 126464160, + "step": 1001 + }, + { + "epoch": 0.25703841467325084, + "grad_norm": 45.67885208129883, + "learning_rate": 5e-06, + "loss": 1.2447, + "num_input_tokens_seen": 126591800, + "step": 1002 + }, + { + "epoch": 0.25703841467325084, + "loss": 1.1347100734710693, + "loss_ce": 0.003850785316899419, + "loss_iou": 0.53125, + "loss_num": 0.01373291015625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 126591800, + "step": 1002 + }, + { + "epoch": 0.25729494003719616, + "grad_norm": 57.108516693115234, + "learning_rate": 5e-06, + "loss": 1.2229, + "num_input_tokens_seen": 126717780, + "step": 1003 + }, + { + "epoch": 0.25729494003719616, + "loss": 1.2947362661361694, + "loss_ce": 0.0022558211348950863, + "loss_iou": 0.6015625, + "loss_num": 0.0186767578125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 126717780, + "step": 1003 + }, + { + "epoch": 0.25755146540114154, + "grad_norm": 72.79289245605469, + "learning_rate": 5e-06, + "loss": 1.3143, + "num_input_tokens_seen": 126843928, + "step": 1004 + }, + { + "epoch": 0.25755146540114154, + "loss": 1.1645861864089966, + "loss_ce": 0.0029651043005287647, + "loss_iou": 0.53125, + "loss_num": 0.0198974609375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 126843928, + "step": 1004 + }, + { + "epoch": 0.2578079907650869, + "grad_norm": 48.55360412597656, + "learning_rate": 5e-06, + "loss": 1.2307, + "num_input_tokens_seen": 126969760, + "step": 1005 + }, + { + "epoch": 0.2578079907650869, + "loss": 1.1613202095031738, + "loss_ce": 0.001652223989367485, + "loss_iou": 0.546875, + "loss_num": 0.0135498046875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 126969760, + "step": 1005 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 39.73745346069336, + "learning_rate": 5e-06, + "loss": 1.2469, + "num_input_tokens_seen": 127095808, + "step": 1006 + }, + { + "epoch": 0.25806451612903225, + "loss": 1.1495115756988525, + "loss_ce": 0.004980337806046009, + "loss_iou": 0.53125, + "loss_num": 0.01544189453125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 127095808, + "step": 1006 + }, + { + "epoch": 0.25832104149297763, + "grad_norm": 61.037940979003906, + "learning_rate": 5e-06, + "loss": 1.2476, + "num_input_tokens_seen": 127222216, + "step": 1007 + }, + { + "epoch": 0.25832104149297763, + "loss": 1.2334895133972168, + "loss_ce": 0.0020442106761038303, + "loss_iou": 0.5625, + "loss_num": 0.021240234375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 127222216, + "step": 1007 + }, + { + "epoch": 0.25857756685692296, + "grad_norm": 100.94872283935547, + "learning_rate": 5e-06, + "loss": 1.3536, + "num_input_tokens_seen": 127347404, + "step": 1008 + }, + { + "epoch": 0.25857756685692296, + "loss": 1.270337700843811, + "loss_ce": 0.0008063868153840303, + "loss_iou": 0.6015625, + "loss_num": 0.01287841796875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 127347404, + "step": 1008 + }, + { + "epoch": 0.25883409222086834, + "grad_norm": 47.56733322143555, + "learning_rate": 5e-06, + "loss": 1.4476, + "num_input_tokens_seen": 127474420, + "step": 1009 + }, + { + "epoch": 0.25883409222086834, + "loss": 1.4321798086166382, + "loss_ce": 0.00444543082267046, + "loss_iou": 0.65234375, + "loss_num": 0.024169921875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 127474420, + "step": 1009 + }, + { + "epoch": 0.2590906175848137, + "grad_norm": 43.912471771240234, + "learning_rate": 5e-06, + "loss": 1.3266, + "num_input_tokens_seen": 127599912, + "step": 1010 + }, + { + "epoch": 0.2590906175848137, + "loss": 1.28582763671875, + "loss_ce": 0.0006713857874274254, + "loss_iou": 0.58984375, + "loss_num": 0.02099609375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 127599912, + "step": 1010 + }, + { + "epoch": 0.25934714294875905, + "grad_norm": 66.23969268798828, + "learning_rate": 5e-06, + "loss": 1.263, + "num_input_tokens_seen": 127725616, + "step": 1011 + }, + { + "epoch": 0.25934714294875905, + "loss": 1.6872084140777588, + "loss_ce": 0.005567763466387987, + "loss_iou": 0.75, + "loss_num": 0.035888671875, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 127725616, + "step": 1011 + }, + { + "epoch": 0.2596036683127044, + "grad_norm": 101.87751007080078, + "learning_rate": 5e-06, + "loss": 1.341, + "num_input_tokens_seen": 127852428, + "step": 1012 + }, + { + "epoch": 0.2596036683127044, + "loss": 1.2209150791168213, + "loss_ce": 0.0011885600397363305, + "loss_iou": 0.578125, + "loss_num": 0.01263427734375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 127852428, + "step": 1012 + }, + { + "epoch": 0.25986019367664975, + "grad_norm": 62.691001892089844, + "learning_rate": 5e-06, + "loss": 1.1843, + "num_input_tokens_seen": 127978196, + "step": 1013 + }, + { + "epoch": 0.25986019367664975, + "loss": 1.383500337600708, + "loss_ce": 0.0011762167559936643, + "loss_iou": 0.6484375, + "loss_num": 0.0177001953125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 127978196, + "step": 1013 + }, + { + "epoch": 0.26011671904059513, + "grad_norm": 60.69166946411133, + "learning_rate": 5e-06, + "loss": 1.283, + "num_input_tokens_seen": 128105336, + "step": 1014 + }, + { + "epoch": 0.26011671904059513, + "loss": 1.2083415985107422, + "loss_ce": 0.0022868672385811806, + "loss_iou": 0.5546875, + "loss_num": 0.01953125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 128105336, + "step": 1014 + }, + { + "epoch": 0.2603732444045405, + "grad_norm": 50.688777923583984, + "learning_rate": 5e-06, + "loss": 1.1484, + "num_input_tokens_seen": 128232736, + "step": 1015 + }, + { + "epoch": 0.2603732444045405, + "loss": 1.2321969270706177, + "loss_ce": 0.0017282064072787762, + "loss_iou": 0.578125, + "loss_num": 0.0146484375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 128232736, + "step": 1015 + }, + { + "epoch": 0.26062976976848584, + "grad_norm": 53.34419250488281, + "learning_rate": 5e-06, + "loss": 1.4511, + "num_input_tokens_seen": 128358808, + "step": 1016 + }, + { + "epoch": 0.26062976976848584, + "loss": 1.482530117034912, + "loss_ce": 0.002549730706959963, + "loss_iou": 0.6640625, + "loss_num": 0.030517578125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 128358808, + "step": 1016 + }, + { + "epoch": 0.2608862951324312, + "grad_norm": 90.92627716064453, + "learning_rate": 5e-06, + "loss": 1.2111, + "num_input_tokens_seen": 128486032, + "step": 1017 + }, + { + "epoch": 0.2608862951324312, + "loss": 1.52119779586792, + "loss_ce": 0.0016665400471538305, + "loss_iou": 0.69140625, + "loss_num": 0.027099609375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 128486032, + "step": 1017 + }, + { + "epoch": 0.2611428204963766, + "grad_norm": 48.63761901855469, + "learning_rate": 5e-06, + "loss": 1.4479, + "num_input_tokens_seen": 128612872, + "step": 1018 + }, + { + "epoch": 0.2611428204963766, + "loss": 1.4284875392913818, + "loss_ce": 0.0027062473818659782, + "loss_iou": 0.66015625, + "loss_num": 0.0205078125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 128612872, + "step": 1018 + }, + { + "epoch": 0.26139934586032193, + "grad_norm": 42.32113265991211, + "learning_rate": 5e-06, + "loss": 1.224, + "num_input_tokens_seen": 128739220, + "step": 1019 + }, + { + "epoch": 0.26139934586032193, + "loss": 1.2114202976226807, + "loss_ce": 0.000971091038081795, + "loss_iou": 0.5546875, + "loss_num": 0.0208740234375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 128739220, + "step": 1019 + }, + { + "epoch": 0.2616558712242673, + "grad_norm": 40.7111930847168, + "learning_rate": 5e-06, + "loss": 1.1924, + "num_input_tokens_seen": 128865440, + "step": 1020 + }, + { + "epoch": 0.2616558712242673, + "loss": 1.2764580249786377, + "loss_ce": 0.00155562290456146, + "loss_iou": 0.5859375, + "loss_num": 0.021484375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 128865440, + "step": 1020 + }, + { + "epoch": 0.26191239658821264, + "grad_norm": 56.98155975341797, + "learning_rate": 5e-06, + "loss": 1.2776, + "num_input_tokens_seen": 128992080, + "step": 1021 + }, + { + "epoch": 0.26191239658821264, + "loss": 1.4043059349060059, + "loss_ce": 0.00342700257897377, + "loss_iou": 0.62890625, + "loss_num": 0.028076171875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 128992080, + "step": 1021 + }, + { + "epoch": 0.262168921952158, + "grad_norm": 105.88673400878906, + "learning_rate": 5e-06, + "loss": 1.221, + "num_input_tokens_seen": 129118688, + "step": 1022 + }, + { + "epoch": 0.262168921952158, + "loss": 1.2894915342330933, + "loss_ce": 0.00042905122973024845, + "loss_iou": 0.60546875, + "loss_num": 0.0159912109375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 129118688, + "step": 1022 + }, + { + "epoch": 0.2624254473161034, + "grad_norm": 49.67167663574219, + "learning_rate": 5e-06, + "loss": 1.4915, + "num_input_tokens_seen": 129244420, + "step": 1023 + }, + { + "epoch": 0.2624254473161034, + "loss": 1.45267653465271, + "loss_ce": 0.0015046806074678898, + "loss_iou": 0.6640625, + "loss_num": 0.0240478515625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 129244420, + "step": 1023 + }, + { + "epoch": 0.2626819726800487, + "grad_norm": 41.57627487182617, + "learning_rate": 5e-06, + "loss": 1.2404, + "num_input_tokens_seen": 129371116, + "step": 1024 + }, + { + "epoch": 0.2626819726800487, + "loss": 1.3135251998901367, + "loss_ce": 0.003954818472266197, + "loss_iou": 0.6015625, + "loss_num": 0.0216064453125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 129371116, + "step": 1024 + }, + { + "epoch": 0.2629384980439941, + "grad_norm": 31.635560989379883, + "learning_rate": 5e-06, + "loss": 1.2988, + "num_input_tokens_seen": 129496056, + "step": 1025 + }, + { + "epoch": 0.2629384980439941, + "loss": 1.3994765281677246, + "loss_ce": 0.0010389585513621569, + "loss_iou": 0.640625, + "loss_num": 0.02392578125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 129496056, + "step": 1025 + }, + { + "epoch": 0.26319502340793943, + "grad_norm": 47.00706100463867, + "learning_rate": 5e-06, + "loss": 1.2076, + "num_input_tokens_seen": 129622292, + "step": 1026 + }, + { + "epoch": 0.26319502340793943, + "loss": 1.229444146156311, + "loss_ce": 0.001416836166754365, + "loss_iou": 0.5703125, + "loss_num": 0.0167236328125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 129622292, + "step": 1026 + }, + { + "epoch": 0.2634515487718848, + "grad_norm": 45.9785041809082, + "learning_rate": 5e-06, + "loss": 1.3464, + "num_input_tokens_seen": 129748956, + "step": 1027 + }, + { + "epoch": 0.2634515487718848, + "loss": 1.226797342300415, + "loss_ce": 0.0031646140851080418, + "loss_iou": 0.5703125, + "loss_num": 0.017333984375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 129748956, + "step": 1027 + }, + { + "epoch": 0.2637080741358302, + "grad_norm": 31.860580444335938, + "learning_rate": 5e-06, + "loss": 1.2139, + "num_input_tokens_seen": 129874492, + "step": 1028 + }, + { + "epoch": 0.2637080741358302, + "loss": 1.2348079681396484, + "loss_ce": 0.0018977585714310408, + "loss_iou": 0.546875, + "loss_num": 0.02734375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 129874492, + "step": 1028 + }, + { + "epoch": 0.2639645994997755, + "grad_norm": 33.097957611083984, + "learning_rate": 5e-06, + "loss": 1.2068, + "num_input_tokens_seen": 130002316, + "step": 1029 + }, + { + "epoch": 0.2639645994997755, + "loss": 1.255366325378418, + "loss_ce": 0.0009717341745272279, + "loss_iou": 0.57421875, + "loss_num": 0.021484375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 130002316, + "step": 1029 + }, + { + "epoch": 0.2642211248637209, + "grad_norm": 42.16763687133789, + "learning_rate": 5e-06, + "loss": 1.4607, + "num_input_tokens_seen": 130129460, + "step": 1030 + }, + { + "epoch": 0.2642211248637209, + "loss": 1.6399447917938232, + "loss_ce": 0.0012729273876175284, + "loss_iou": 0.73046875, + "loss_num": 0.03515625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 130129460, + "step": 1030 + }, + { + "epoch": 0.2644776502276663, + "grad_norm": 69.86314392089844, + "learning_rate": 5e-06, + "loss": 1.2289, + "num_input_tokens_seen": 130256732, + "step": 1031 + }, + { + "epoch": 0.2644776502276663, + "loss": 1.2357208728790283, + "loss_ce": 0.0008575776591897011, + "loss_iou": 0.58203125, + "loss_num": 0.0147705078125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 130256732, + "step": 1031 + }, + { + "epoch": 0.2647341755916116, + "grad_norm": 46.19670486450195, + "learning_rate": 5e-06, + "loss": 1.3814, + "num_input_tokens_seen": 130384096, + "step": 1032 + }, + { + "epoch": 0.2647341755916116, + "loss": 1.1509019136428833, + "loss_ce": 0.003440987318754196, + "loss_iou": 0.53125, + "loss_num": 0.0169677734375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 130384096, + "step": 1032 + }, + { + "epoch": 0.264990700955557, + "grad_norm": 34.3622932434082, + "learning_rate": 5e-06, + "loss": 1.2134, + "num_input_tokens_seen": 130510592, + "step": 1033 + }, + { + "epoch": 0.264990700955557, + "loss": 1.2414436340332031, + "loss_ce": 0.003650777041912079, + "loss_iou": 0.56640625, + "loss_num": 0.0213623046875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 130510592, + "step": 1033 + }, + { + "epoch": 0.2652472263195023, + "grad_norm": 60.838165283203125, + "learning_rate": 5e-06, + "loss": 1.2733, + "num_input_tokens_seen": 130637568, + "step": 1034 + }, + { + "epoch": 0.2652472263195023, + "loss": 1.2846837043762207, + "loss_ce": 0.0014805427053943276, + "loss_iou": 0.58203125, + "loss_num": 0.0233154296875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 130637568, + "step": 1034 + }, + { + "epoch": 0.2655037516834477, + "grad_norm": 75.01587677001953, + "learning_rate": 5e-06, + "loss": 1.3325, + "num_input_tokens_seen": 130764788, + "step": 1035 + }, + { + "epoch": 0.2655037516834477, + "loss": 1.5811057090759277, + "loss_ce": 0.0015158930327743292, + "loss_iou": 0.7265625, + "loss_num": 0.0252685546875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 130764788, + "step": 1035 + }, + { + "epoch": 0.2657602770473931, + "grad_norm": 56.5657958984375, + "learning_rate": 5e-06, + "loss": 1.3726, + "num_input_tokens_seen": 130890324, + "step": 1036 + }, + { + "epoch": 0.2657602770473931, + "loss": 1.3378958702087402, + "loss_ce": 0.0029348793905228376, + "loss_iou": 0.62109375, + "loss_num": 0.01806640625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 130890324, + "step": 1036 + }, + { + "epoch": 0.2660168024113384, + "grad_norm": 65.49407958984375, + "learning_rate": 5e-06, + "loss": 1.2119, + "num_input_tokens_seen": 131016512, + "step": 1037 + }, + { + "epoch": 0.2660168024113384, + "loss": 1.1522557735443115, + "loss_ce": 0.00235351687297225, + "loss_iou": 0.5234375, + "loss_num": 0.0211181640625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 131016512, + "step": 1037 + }, + { + "epoch": 0.2662733277752838, + "grad_norm": 60.1034049987793, + "learning_rate": 5e-06, + "loss": 1.3162, + "num_input_tokens_seen": 131142776, + "step": 1038 + }, + { + "epoch": 0.2662733277752838, + "loss": 1.3400213718414307, + "loss_ce": 0.0031073412392288446, + "loss_iou": 0.6171875, + "loss_num": 0.0196533203125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 131142776, + "step": 1038 + }, + { + "epoch": 0.26652985313922917, + "grad_norm": 46.14387130737305, + "learning_rate": 5e-06, + "loss": 1.2992, + "num_input_tokens_seen": 131269484, + "step": 1039 + }, + { + "epoch": 0.26652985313922917, + "loss": 1.2884949445724487, + "loss_ce": 0.0004090270376764238, + "loss_iou": 0.5703125, + "loss_num": 0.02880859375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 131269484, + "step": 1039 + }, + { + "epoch": 0.2667863785031745, + "grad_norm": 49.07207489013672, + "learning_rate": 5e-06, + "loss": 1.3384, + "num_input_tokens_seen": 131394924, + "step": 1040 + }, + { + "epoch": 0.2667863785031745, + "loss": 1.2980718612670898, + "loss_ce": 0.0051031168550252914, + "loss_iou": 0.58984375, + "loss_num": 0.022216796875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 131394924, + "step": 1040 + }, + { + "epoch": 0.2670429038671199, + "grad_norm": 69.40968322753906, + "learning_rate": 5e-06, + "loss": 1.2356, + "num_input_tokens_seen": 131521488, + "step": 1041 + }, + { + "epoch": 0.2670429038671199, + "loss": 1.3211698532104492, + "loss_ce": 0.0023221999872475863, + "loss_iou": 0.59375, + "loss_num": 0.02587890625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 131521488, + "step": 1041 + }, + { + "epoch": 0.2672994292310652, + "grad_norm": 62.381099700927734, + "learning_rate": 5e-06, + "loss": 1.4408, + "num_input_tokens_seen": 131647368, + "step": 1042 + }, + { + "epoch": 0.2672994292310652, + "loss": 1.3574647903442383, + "loss_ce": 0.0010194622445851564, + "loss_iou": 0.6328125, + "loss_num": 0.017578125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 131647368, + "step": 1042 + }, + { + "epoch": 0.2675559545950106, + "grad_norm": 33.51759338378906, + "learning_rate": 5e-06, + "loss": 1.2418, + "num_input_tokens_seen": 131774324, + "step": 1043 + }, + { + "epoch": 0.2675559545950106, + "loss": 1.0886719226837158, + "loss_ce": 0.002246186137199402, + "loss_iou": 0.515625, + "loss_num": 0.010986328125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 131774324, + "step": 1043 + }, + { + "epoch": 0.26781247995895596, + "grad_norm": 29.24509048461914, + "learning_rate": 5e-06, + "loss": 1.3292, + "num_input_tokens_seen": 131900236, + "step": 1044 + }, + { + "epoch": 0.26781247995895596, + "loss": 1.5134367942810059, + "loss_ce": 0.0036712924484163523, + "loss_iou": 0.6640625, + "loss_num": 0.0361328125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 131900236, + "step": 1044 + }, + { + "epoch": 0.2680690053229013, + "grad_norm": 30.52443504333496, + "learning_rate": 5e-06, + "loss": 1.264, + "num_input_tokens_seen": 132026948, + "step": 1045 + }, + { + "epoch": 0.2680690053229013, + "loss": 1.2478322982788086, + "loss_ce": 0.0007619146490469575, + "loss_iou": 0.5703125, + "loss_num": 0.020263671875, + "loss_xval": 1.25, + "num_input_tokens_seen": 132026948, + "step": 1045 + }, + { + "epoch": 0.26832553068684667, + "grad_norm": 49.36771011352539, + "learning_rate": 5e-06, + "loss": 1.0959, + "num_input_tokens_seen": 132152808, + "step": 1046 + }, + { + "epoch": 0.26832553068684667, + "loss": 1.0932854413986206, + "loss_ce": 0.00148856732994318, + "loss_iou": 0.50390625, + "loss_num": 0.01708984375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 132152808, + "step": 1046 + }, + { + "epoch": 0.268582056050792, + "grad_norm": 68.7695541381836, + "learning_rate": 5e-06, + "loss": 1.1856, + "num_input_tokens_seen": 132279328, + "step": 1047 + }, + { + "epoch": 0.268582056050792, + "loss": 1.1976566314697266, + "loss_ce": 0.0028323503211140633, + "loss_iou": 0.546875, + "loss_num": 0.02099609375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 132279328, + "step": 1047 + }, + { + "epoch": 0.2688385814147374, + "grad_norm": 48.63533401489258, + "learning_rate": 5e-06, + "loss": 1.2704, + "num_input_tokens_seen": 132405632, + "step": 1048 + }, + { + "epoch": 0.2688385814147374, + "loss": 1.372565507888794, + "loss_ce": 0.0019600000232458115, + "loss_iou": 0.62109375, + "loss_num": 0.026123046875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 132405632, + "step": 1048 + }, + { + "epoch": 0.26909510677868276, + "grad_norm": 39.54453659057617, + "learning_rate": 5e-06, + "loss": 1.2722, + "num_input_tokens_seen": 132531328, + "step": 1049 + }, + { + "epoch": 0.26909510677868276, + "loss": 1.2236113548278809, + "loss_ce": 0.00046687384019605815, + "loss_iou": 0.5703125, + "loss_num": 0.01708984375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 132531328, + "step": 1049 + }, + { + "epoch": 0.2693516321426281, + "grad_norm": 86.25248718261719, + "learning_rate": 5e-06, + "loss": 1.1899, + "num_input_tokens_seen": 132657956, + "step": 1050 + }, + { + "epoch": 0.2693516321426281, + "loss": 1.4041413068771362, + "loss_ce": 0.0022858367301523685, + "loss_iou": 0.63671875, + "loss_num": 0.0263671875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 132657956, + "step": 1050 + }, + { + "epoch": 0.26960815750657346, + "grad_norm": 51.75539779663086, + "learning_rate": 5e-06, + "loss": 1.3092, + "num_input_tokens_seen": 132784104, + "step": 1051 + }, + { + "epoch": 0.26960815750657346, + "loss": 1.2126885652542114, + "loss_ce": 0.00175106234382838, + "loss_iou": 0.5625, + "loss_num": 0.0164794921875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 132784104, + "step": 1051 + }, + { + "epoch": 0.26986468287051885, + "grad_norm": 37.408103942871094, + "learning_rate": 5e-06, + "loss": 1.2988, + "num_input_tokens_seen": 132910352, + "step": 1052 + }, + { + "epoch": 0.26986468287051885, + "loss": 1.1763677597045898, + "loss_ce": 0.0025397029239684343, + "loss_iou": 0.54296875, + "loss_num": 0.01806640625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 132910352, + "step": 1052 + }, + { + "epoch": 0.27012120823446417, + "grad_norm": 62.1556282043457, + "learning_rate": 5e-06, + "loss": 1.2572, + "num_input_tokens_seen": 133035072, + "step": 1053 + }, + { + "epoch": 0.27012120823446417, + "loss": 1.4323375225067139, + "loss_ce": 0.0021617719903588295, + "loss_iou": 0.65234375, + "loss_num": 0.0247802734375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 133035072, + "step": 1053 + }, + { + "epoch": 0.27037773359840955, + "grad_norm": 45.63850402832031, + "learning_rate": 5e-06, + "loss": 1.3859, + "num_input_tokens_seen": 133160464, + "step": 1054 + }, + { + "epoch": 0.27037773359840955, + "loss": 1.195845365524292, + "loss_ce": 0.0015095099806785583, + "loss_iou": 0.5625, + "loss_num": 0.013671875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 133160464, + "step": 1054 + }, + { + "epoch": 0.2706342589623549, + "grad_norm": 41.18147659301758, + "learning_rate": 5e-06, + "loss": 1.2209, + "num_input_tokens_seen": 133288020, + "step": 1055 + }, + { + "epoch": 0.2706342589623549, + "loss": 1.3176836967468262, + "loss_ce": 0.0032306499779224396, + "loss_iou": 0.58984375, + "loss_num": 0.02734375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 133288020, + "step": 1055 + }, + { + "epoch": 0.27089078432630026, + "grad_norm": 50.79869842529297, + "learning_rate": 5e-06, + "loss": 1.1388, + "num_input_tokens_seen": 133413576, + "step": 1056 + }, + { + "epoch": 0.27089078432630026, + "loss": 0.915912389755249, + "loss_ce": 0.004291311837732792, + "loss_iou": 0.43359375, + "loss_num": 0.00897216796875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 133413576, + "step": 1056 + }, + { + "epoch": 0.27114730969024564, + "grad_norm": 96.04622650146484, + "learning_rate": 5e-06, + "loss": 1.2823, + "num_input_tokens_seen": 133540000, + "step": 1057 + }, + { + "epoch": 0.27114730969024564, + "loss": 1.3846828937530518, + "loss_ce": 0.000893853313755244, + "loss_iou": 0.640625, + "loss_num": 0.020751953125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 133540000, + "step": 1057 + }, + { + "epoch": 0.27140383505419097, + "grad_norm": 51.8017692565918, + "learning_rate": 5e-06, + "loss": 1.4073, + "num_input_tokens_seen": 133666436, + "step": 1058 + }, + { + "epoch": 0.27140383505419097, + "loss": 1.3004405498504639, + "loss_ce": 0.0016124380053952336, + "loss_iou": 0.6015625, + "loss_num": 0.01904296875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 133666436, + "step": 1058 + }, + { + "epoch": 0.27166036041813635, + "grad_norm": 97.17811584472656, + "learning_rate": 5e-06, + "loss": 1.3045, + "num_input_tokens_seen": 133794124, + "step": 1059 + }, + { + "epoch": 0.27166036041813635, + "loss": 1.475234866142273, + "loss_ce": 0.0055083176121115685, + "loss_iou": 0.65625, + "loss_num": 0.031982421875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 133794124, + "step": 1059 + }, + { + "epoch": 0.27191688578208173, + "grad_norm": 42.714324951171875, + "learning_rate": 5e-06, + "loss": 1.3787, + "num_input_tokens_seen": 133919596, + "step": 1060 + }, + { + "epoch": 0.27191688578208173, + "loss": 1.4620192050933838, + "loss_ce": 0.006941142957657576, + "loss_iou": 0.62109375, + "loss_num": 0.041748046875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 133919596, + "step": 1060 + }, + { + "epoch": 0.27217341114602706, + "grad_norm": 55.97426223754883, + "learning_rate": 5e-06, + "loss": 1.1557, + "num_input_tokens_seen": 134046724, + "step": 1061 + }, + { + "epoch": 0.27217341114602706, + "loss": 1.1047406196594238, + "loss_ce": 0.000736708811018616, + "loss_iou": 0.515625, + "loss_num": 0.01397705078125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 134046724, + "step": 1061 + }, + { + "epoch": 0.27242993650997244, + "grad_norm": 83.4716567993164, + "learning_rate": 5e-06, + "loss": 1.2447, + "num_input_tokens_seen": 134172652, + "step": 1062 + }, + { + "epoch": 0.27242993650997244, + "loss": 1.1544345617294312, + "loss_ce": 0.0011141583090648055, + "loss_iou": 0.5546875, + "loss_num": 0.0084228515625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 134172652, + "step": 1062 + }, + { + "epoch": 0.27268646187391776, + "grad_norm": 59.71430206298828, + "learning_rate": 5e-06, + "loss": 1.4993, + "num_input_tokens_seen": 134298568, + "step": 1063 + }, + { + "epoch": 0.27268646187391776, + "loss": 1.4713773727416992, + "loss_ce": 0.0016507022082805634, + "loss_iou": 0.671875, + "loss_num": 0.0245361328125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 134298568, + "step": 1063 + }, + { + "epoch": 0.27294298723786314, + "grad_norm": 37.27223587036133, + "learning_rate": 5e-06, + "loss": 1.1524, + "num_input_tokens_seen": 134425084, + "step": 1064 + }, + { + "epoch": 0.27294298723786314, + "loss": 1.1787223815917969, + "loss_ce": 0.003917615860700607, + "loss_iou": 0.53515625, + "loss_num": 0.021484375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 134425084, + "step": 1064 + }, + { + "epoch": 0.2731995126018085, + "grad_norm": 309.0400085449219, + "learning_rate": 5e-06, + "loss": 1.2624, + "num_input_tokens_seen": 134551044, + "step": 1065 + }, + { + "epoch": 0.2731995126018085, + "loss": 1.3719955682754517, + "loss_ce": 0.00334329716861248, + "loss_iou": 0.6015625, + "loss_num": 0.03271484375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 134551044, + "step": 1065 + }, + { + "epoch": 0.27345603796575385, + "grad_norm": 45.84564971923828, + "learning_rate": 5e-06, + "loss": 1.3777, + "num_input_tokens_seen": 134677040, + "step": 1066 + }, + { + "epoch": 0.27345603796575385, + "loss": 1.4574038982391357, + "loss_ce": 0.0033023571595549583, + "loss_iou": 0.65625, + "loss_num": 0.0286865234375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 134677040, + "step": 1066 + }, + { + "epoch": 0.27371256332969923, + "grad_norm": 33.84761047363281, + "learning_rate": 5e-06, + "loss": 1.2885, + "num_input_tokens_seen": 134803988, + "step": 1067 + }, + { + "epoch": 0.27371256332969923, + "loss": 1.4272708892822266, + "loss_ce": 0.0034428443759679794, + "loss_iou": 0.64453125, + "loss_num": 0.0272216796875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 134803988, + "step": 1067 + }, + { + "epoch": 0.27396908869364456, + "grad_norm": 53.4254035949707, + "learning_rate": 5e-06, + "loss": 1.1761, + "num_input_tokens_seen": 134930216, + "step": 1068 + }, + { + "epoch": 0.27396908869364456, + "loss": 1.1296992301940918, + "loss_ce": 0.0051875123754143715, + "loss_iou": 0.51171875, + "loss_num": 0.0208740234375, + "loss_xval": 1.125, + "num_input_tokens_seen": 134930216, + "step": 1068 + }, + { + "epoch": 0.27422561405758994, + "grad_norm": 57.68470764160156, + "learning_rate": 5e-06, + "loss": 1.2964, + "num_input_tokens_seen": 135056488, + "step": 1069 + }, + { + "epoch": 0.27422561405758994, + "loss": 1.1420822143554688, + "loss_ce": 0.0009689350845292211, + "loss_iou": 0.5390625, + "loss_num": 0.01263427734375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 135056488, + "step": 1069 + }, + { + "epoch": 0.2744821394215353, + "grad_norm": 58.063533782958984, + "learning_rate": 5e-06, + "loss": 1.3148, + "num_input_tokens_seen": 135182740, + "step": 1070 + }, + { + "epoch": 0.2744821394215353, + "loss": 1.2003554105758667, + "loss_ce": 0.0006484282203018665, + "loss_iou": 0.546875, + "loss_num": 0.021484375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 135182740, + "step": 1070 + }, + { + "epoch": 0.27473866478548065, + "grad_norm": 40.253231048583984, + "learning_rate": 5e-06, + "loss": 1.168, + "num_input_tokens_seen": 135309212, + "step": 1071 + }, + { + "epoch": 0.27473866478548065, + "loss": 0.9352540969848633, + "loss_ce": 0.0006838029366917908, + "loss_iou": 0.447265625, + "loss_num": 0.00762939453125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 135309212, + "step": 1071 + }, + { + "epoch": 0.274995190149426, + "grad_norm": 29.102542877197266, + "learning_rate": 5e-06, + "loss": 1.2006, + "num_input_tokens_seen": 135434872, + "step": 1072 + }, + { + "epoch": 0.274995190149426, + "loss": 1.1460816860198975, + "loss_ce": 0.004968361463397741, + "loss_iou": 0.5234375, + "loss_num": 0.01904296875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 135434872, + "step": 1072 + }, + { + "epoch": 0.2752517155133714, + "grad_norm": 33.12941360473633, + "learning_rate": 5e-06, + "loss": 1.2288, + "num_input_tokens_seen": 135560144, + "step": 1073 + }, + { + "epoch": 0.2752517155133714, + "loss": 1.0685393810272217, + "loss_ce": 0.0021331259049475193, + "loss_iou": 0.498046875, + "loss_num": 0.0137939453125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 135560144, + "step": 1073 + }, + { + "epoch": 0.27550824087731673, + "grad_norm": 58.799983978271484, + "learning_rate": 5e-06, + "loss": 1.211, + "num_input_tokens_seen": 135685508, + "step": 1074 + }, + { + "epoch": 0.27550824087731673, + "loss": 1.2964286804199219, + "loss_ce": 0.0005302400095388293, + "loss_iou": 0.57421875, + "loss_num": 0.0302734375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 135685508, + "step": 1074 + }, + { + "epoch": 0.2757647662412621, + "grad_norm": 50.245750427246094, + "learning_rate": 5e-06, + "loss": 1.3273, + "num_input_tokens_seen": 135810768, + "step": 1075 + }, + { + "epoch": 0.2757647662412621, + "loss": 1.228405237197876, + "loss_ce": 0.0003778300597332418, + "loss_iou": 0.5703125, + "loss_num": 0.0179443359375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 135810768, + "step": 1075 + }, + { + "epoch": 0.27602129160520744, + "grad_norm": 59.07404708862305, + "learning_rate": 5e-06, + "loss": 1.0873, + "num_input_tokens_seen": 135936388, + "step": 1076 + }, + { + "epoch": 0.27602129160520744, + "loss": 1.1728357076644897, + "loss_ce": 0.002913873177021742, + "loss_iou": 0.53125, + "loss_num": 0.020751953125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 135936388, + "step": 1076 + }, + { + "epoch": 0.2762778169691528, + "grad_norm": 65.16525268554688, + "learning_rate": 5e-06, + "loss": 1.2797, + "num_input_tokens_seen": 136063064, + "step": 1077 + }, + { + "epoch": 0.2762778169691528, + "loss": 1.1645512580871582, + "loss_ce": 0.0014653787948191166, + "loss_iou": 0.546875, + "loss_num": 0.01416015625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 136063064, + "step": 1077 + }, + { + "epoch": 0.2765343423330982, + "grad_norm": 80.04695129394531, + "learning_rate": 5e-06, + "loss": 1.3239, + "num_input_tokens_seen": 136189152, + "step": 1078 + }, + { + "epoch": 0.2765343423330982, + "loss": 1.3317536115646362, + "loss_ce": 0.0011872241739183664, + "loss_iou": 0.6171875, + "loss_num": 0.0186767578125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 136189152, + "step": 1078 + }, + { + "epoch": 0.27679086769704353, + "grad_norm": 50.646881103515625, + "learning_rate": 5e-06, + "loss": 1.4159, + "num_input_tokens_seen": 136315196, + "step": 1079 + }, + { + "epoch": 0.27679086769704353, + "loss": 1.313291311264038, + "loss_ce": 0.0017678868025541306, + "loss_iou": 0.6015625, + "loss_num": 0.021240234375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 136315196, + "step": 1079 + }, + { + "epoch": 0.2770473930609889, + "grad_norm": 41.17940902709961, + "learning_rate": 5e-06, + "loss": 1.1995, + "num_input_tokens_seen": 136441200, + "step": 1080 + }, + { + "epoch": 0.2770473930609889, + "loss": 1.3864386081695557, + "loss_ce": 0.0016729463823139668, + "loss_iou": 0.62109375, + "loss_num": 0.028076171875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 136441200, + "step": 1080 + }, + { + "epoch": 0.2773039184249343, + "grad_norm": 63.218284606933594, + "learning_rate": 5e-06, + "loss": 1.2256, + "num_input_tokens_seen": 136567820, + "step": 1081 + }, + { + "epoch": 0.2773039184249343, + "loss": 1.1540316343307495, + "loss_ce": 0.0011996570974588394, + "loss_iou": 0.5390625, + "loss_num": 0.01434326171875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 136567820, + "step": 1081 + }, + { + "epoch": 0.2775604437888796, + "grad_norm": 44.97651290893555, + "learning_rate": 5e-06, + "loss": 1.3834, + "num_input_tokens_seen": 136694116, + "step": 1082 + }, + { + "epoch": 0.2775604437888796, + "loss": 1.5719619989395142, + "loss_ce": 0.0016495055751875043, + "loss_iou": 0.7109375, + "loss_num": 0.02978515625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 136694116, + "step": 1082 + }, + { + "epoch": 0.277816969152825, + "grad_norm": 38.5687141418457, + "learning_rate": 5e-06, + "loss": 1.257, + "num_input_tokens_seen": 136821120, + "step": 1083 + }, + { + "epoch": 0.277816969152825, + "loss": 1.264225721359253, + "loss_ce": 0.0015304363332688808, + "loss_iou": 0.57421875, + "loss_num": 0.0225830078125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 136821120, + "step": 1083 + }, + { + "epoch": 0.2780734945167703, + "grad_norm": 64.35529327392578, + "learning_rate": 5e-06, + "loss": 1.2705, + "num_input_tokens_seen": 136947864, + "step": 1084 + }, + { + "epoch": 0.2780734945167703, + "loss": 1.4071338176727295, + "loss_ce": 0.0013721134746447206, + "loss_iou": 0.640625, + "loss_num": 0.0245361328125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 136947864, + "step": 1084 + }, + { + "epoch": 0.2783300198807157, + "grad_norm": 97.09880065917969, + "learning_rate": 5e-06, + "loss": 1.2925, + "num_input_tokens_seen": 137074204, + "step": 1085 + }, + { + "epoch": 0.2783300198807157, + "loss": 1.3714444637298584, + "loss_ce": 0.001815575873479247, + "loss_iou": 0.640625, + "loss_num": 0.016845703125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 137074204, + "step": 1085 + }, + { + "epoch": 0.2785865452446611, + "grad_norm": 55.953330993652344, + "learning_rate": 5e-06, + "loss": 1.5124, + "num_input_tokens_seen": 137201344, + "step": 1086 + }, + { + "epoch": 0.2785865452446611, + "loss": 1.605802297592163, + "loss_ce": 0.007169452495872974, + "loss_iou": 0.7265625, + "loss_num": 0.029296875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 137201344, + "step": 1086 + }, + { + "epoch": 0.2788430706086064, + "grad_norm": 36.58639907836914, + "learning_rate": 5e-06, + "loss": 1.1374, + "num_input_tokens_seen": 137327984, + "step": 1087 + }, + { + "epoch": 0.2788430706086064, + "loss": 1.151187777519226, + "loss_ce": 0.000308868387946859, + "loss_iou": 0.53125, + "loss_num": 0.018310546875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 137327984, + "step": 1087 + }, + { + "epoch": 0.2790995959725518, + "grad_norm": 95.76270294189453, + "learning_rate": 5e-06, + "loss": 1.222, + "num_input_tokens_seen": 137454048, + "step": 1088 + }, + { + "epoch": 0.2790995959725518, + "loss": 1.0204031467437744, + "loss_ce": 0.0013602841645479202, + "loss_iou": 0.48046875, + "loss_num": 0.011962890625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 137454048, + "step": 1088 + }, + { + "epoch": 0.2793561213364971, + "grad_norm": 43.06173324584961, + "learning_rate": 5e-06, + "loss": 1.4906, + "num_input_tokens_seen": 137579224, + "step": 1089 + }, + { + "epoch": 0.2793561213364971, + "loss": 1.40779709815979, + "loss_ce": 0.0005705608637072146, + "loss_iou": 0.6484375, + "loss_num": 0.022705078125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 137579224, + "step": 1089 + }, + { + "epoch": 0.2796126467004425, + "grad_norm": 54.673946380615234, + "learning_rate": 5e-06, + "loss": 1.1841, + "num_input_tokens_seen": 137705504, + "step": 1090 + }, + { + "epoch": 0.2796126467004425, + "loss": 1.2519553899765015, + "loss_ce": 0.0009788633324205875, + "loss_iou": 0.5625, + "loss_num": 0.025390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 137705504, + "step": 1090 + }, + { + "epoch": 0.2798691720643879, + "grad_norm": 64.4017333984375, + "learning_rate": 5e-06, + "loss": 1.264, + "num_input_tokens_seen": 137831860, + "step": 1091 + }, + { + "epoch": 0.2798691720643879, + "loss": 1.4295732975006104, + "loss_ce": 0.003792070783674717, + "loss_iou": 0.6328125, + "loss_num": 0.032470703125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 137831860, + "step": 1091 + }, + { + "epoch": 0.2801256974283332, + "grad_norm": 58.19264221191406, + "learning_rate": 5e-06, + "loss": 1.2806, + "num_input_tokens_seen": 137958396, + "step": 1092 + }, + { + "epoch": 0.2801256974283332, + "loss": 1.3535892963409424, + "loss_ce": 0.004468244034796953, + "loss_iou": 0.6171875, + "loss_num": 0.0225830078125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 137958396, + "step": 1092 + }, + { + "epoch": 0.2803822227922786, + "grad_norm": 60.08560562133789, + "learning_rate": 5e-06, + "loss": 1.3366, + "num_input_tokens_seen": 138084144, + "step": 1093 + }, + { + "epoch": 0.2803822227922786, + "loss": 1.3057148456573486, + "loss_ce": 0.001515679177828133, + "loss_iou": 0.5859375, + "loss_num": 0.0274658203125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 138084144, + "step": 1093 + }, + { + "epoch": 0.28063874815622397, + "grad_norm": 55.45512008666992, + "learning_rate": 5e-06, + "loss": 1.2998, + "num_input_tokens_seen": 138210384, + "step": 1094 + }, + { + "epoch": 0.28063874815622397, + "loss": 1.3557074069976807, + "loss_ce": 0.005121408496052027, + "loss_iou": 0.6171875, + "loss_num": 0.0238037109375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 138210384, + "step": 1094 + }, + { + "epoch": 0.2808952735201693, + "grad_norm": 74.07839965820312, + "learning_rate": 5e-06, + "loss": 1.2429, + "num_input_tokens_seen": 138336428, + "step": 1095 + }, + { + "epoch": 0.2808952735201693, + "loss": 0.9496305584907532, + "loss_ce": 0.001388395787216723, + "loss_iou": 0.447265625, + "loss_num": 0.0107421875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 138336428, + "step": 1095 + }, + { + "epoch": 0.2811517988841147, + "grad_norm": 41.5804328918457, + "learning_rate": 5e-06, + "loss": 1.2335, + "num_input_tokens_seen": 138462532, + "step": 1096 + }, + { + "epoch": 0.2811517988841147, + "loss": 1.0138120651245117, + "loss_ce": 0.0016050919657573104, + "loss_iou": 0.482421875, + "loss_num": 0.00970458984375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 138462532, + "step": 1096 + }, + { + "epoch": 0.28140832424806, + "grad_norm": 49.26521301269531, + "learning_rate": 5e-06, + "loss": 1.2488, + "num_input_tokens_seen": 138589176, + "step": 1097 + }, + { + "epoch": 0.28140832424806, + "loss": 1.2082653045654297, + "loss_ce": 0.0007457744795829058, + "loss_iou": 0.5625, + "loss_num": 0.0162353515625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 138589176, + "step": 1097 + }, + { + "epoch": 0.2816648496120054, + "grad_norm": 52.77206802368164, + "learning_rate": 5e-06, + "loss": 1.2299, + "num_input_tokens_seen": 138715688, + "step": 1098 + }, + { + "epoch": 0.2816648496120054, + "loss": 1.4201366901397705, + "loss_ce": 0.002167979720979929, + "loss_iou": 0.640625, + "loss_num": 0.0283203125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 138715688, + "step": 1098 + }, + { + "epoch": 0.28192137497595077, + "grad_norm": 77.12232971191406, + "learning_rate": 5e-06, + "loss": 1.2996, + "num_input_tokens_seen": 138842000, + "step": 1099 + }, + { + "epoch": 0.28192137497595077, + "loss": 1.2699027061462402, + "loss_ce": 0.00037145998794585466, + "loss_iou": 0.59375, + "loss_num": 0.0155029296875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 138842000, + "step": 1099 + }, + { + "epoch": 0.2821779003398961, + "grad_norm": 45.3622932434082, + "learning_rate": 5e-06, + "loss": 1.528, + "num_input_tokens_seen": 138968668, + "step": 1100 + }, + { + "epoch": 0.2821779003398961, + "loss": 1.4429421424865723, + "loss_ce": 0.003488954622298479, + "loss_iou": 0.62890625, + "loss_num": 0.036865234375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 138968668, + "step": 1100 + }, + { + "epoch": 0.2824344257038415, + "grad_norm": 29.01014518737793, + "learning_rate": 5e-06, + "loss": 1.198, + "num_input_tokens_seen": 139095400, + "step": 1101 + }, + { + "epoch": 0.2824344257038415, + "loss": 1.2425529956817627, + "loss_ce": 0.0008538025431334972, + "loss_iou": 0.57421875, + "loss_num": 0.0185546875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 139095400, + "step": 1101 + }, + { + "epoch": 0.2826909510677868, + "grad_norm": 85.55681610107422, + "learning_rate": 5e-06, + "loss": 1.2397, + "num_input_tokens_seen": 139221972, + "step": 1102 + }, + { + "epoch": 0.2826909510677868, + "loss": 1.255582332611084, + "loss_ce": 0.0002112557995133102, + "loss_iou": 0.5859375, + "loss_num": 0.0174560546875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 139221972, + "step": 1102 + }, + { + "epoch": 0.2829474764317322, + "grad_norm": 46.68386459350586, + "learning_rate": 5e-06, + "loss": 1.2331, + "num_input_tokens_seen": 139347548, + "step": 1103 + }, + { + "epoch": 0.2829474764317322, + "loss": 1.0226609706878662, + "loss_ce": 0.0011765200179070234, + "loss_iou": 0.484375, + "loss_num": 0.0101318359375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 139347548, + "step": 1103 + }, + { + "epoch": 0.28320400179567756, + "grad_norm": 46.64775085449219, + "learning_rate": 5e-06, + "loss": 1.278, + "num_input_tokens_seen": 139474564, + "step": 1104 + }, + { + "epoch": 0.28320400179567756, + "loss": 1.0733956098556519, + "loss_ce": 0.0016182640101760626, + "loss_iou": 0.49609375, + "loss_num": 0.01611328125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 139474564, + "step": 1104 + }, + { + "epoch": 0.2834605271596229, + "grad_norm": 77.18077850341797, + "learning_rate": 5e-06, + "loss": 1.1651, + "num_input_tokens_seen": 139600820, + "step": 1105 + }, + { + "epoch": 0.2834605271596229, + "loss": 1.2335973978042603, + "loss_ce": 0.0016637819353491068, + "loss_iou": 0.578125, + "loss_num": 0.0146484375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 139600820, + "step": 1105 + }, + { + "epoch": 0.28371705252356827, + "grad_norm": 93.04711151123047, + "learning_rate": 5e-06, + "loss": 1.4364, + "num_input_tokens_seen": 139727796, + "step": 1106 + }, + { + "epoch": 0.28371705252356827, + "loss": 1.4401211738586426, + "loss_ce": 0.0016446657245978713, + "loss_iou": 0.6796875, + "loss_num": 0.01611328125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 139727796, + "step": 1106 + }, + { + "epoch": 0.28397357788751365, + "grad_norm": 50.19260787963867, + "learning_rate": 5e-06, + "loss": 1.435, + "num_input_tokens_seen": 139853136, + "step": 1107 + }, + { + "epoch": 0.28397357788751365, + "loss": 1.6265079975128174, + "loss_ce": 0.0010196957737207413, + "loss_iou": 0.71484375, + "loss_num": 0.03857421875, + "loss_xval": 1.625, + "num_input_tokens_seen": 139853136, + "step": 1107 + }, + { + "epoch": 0.284230103251459, + "grad_norm": 42.54955291748047, + "learning_rate": 5e-06, + "loss": 1.1976, + "num_input_tokens_seen": 139978512, + "step": 1108 + }, + { + "epoch": 0.284230103251459, + "loss": 1.3604955673217773, + "loss_ce": 0.0016088446136564016, + "loss_iou": 0.62109375, + "loss_num": 0.0238037109375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 139978512, + "step": 1108 + }, + { + "epoch": 0.28448662861540436, + "grad_norm": 39.10788345336914, + "learning_rate": 5e-06, + "loss": 1.3309, + "num_input_tokens_seen": 140104844, + "step": 1109 + }, + { + "epoch": 0.28448662861540436, + "loss": 1.2154850959777832, + "loss_ce": 0.0021060993894934654, + "loss_iou": 0.5546875, + "loss_num": 0.021240234375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 140104844, + "step": 1109 + }, + { + "epoch": 0.2847431539793497, + "grad_norm": 67.5004653930664, + "learning_rate": 5e-06, + "loss": 1.0534, + "num_input_tokens_seen": 140231780, + "step": 1110 + }, + { + "epoch": 0.2847431539793497, + "loss": 1.0767313241958618, + "loss_ce": 0.0010477215982973576, + "loss_iou": 0.5078125, + "loss_num": 0.01251220703125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 140231780, + "step": 1110 + }, + { + "epoch": 0.28499967934329506, + "grad_norm": 51.003082275390625, + "learning_rate": 5e-06, + "loss": 1.2674, + "num_input_tokens_seen": 140358788, + "step": 1111 + }, + { + "epoch": 0.28499967934329506, + "loss": 1.2544225454330444, + "loss_ce": 0.0005163264577277005, + "loss_iou": 0.58203125, + "loss_num": 0.0184326171875, + "loss_xval": 1.25, + "num_input_tokens_seen": 140358788, + "step": 1111 + }, + { + "epoch": 0.28525620470724045, + "grad_norm": 37.769744873046875, + "learning_rate": 5e-06, + "loss": 1.2052, + "num_input_tokens_seen": 140484732, + "step": 1112 + }, + { + "epoch": 0.28525620470724045, + "loss": 1.061718225479126, + "loss_ce": 0.0011713991407305002, + "loss_iou": 0.494140625, + "loss_num": 0.014404296875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 140484732, + "step": 1112 + }, + { + "epoch": 0.28551273007118577, + "grad_norm": 73.70709991455078, + "learning_rate": 5e-06, + "loss": 1.2858, + "num_input_tokens_seen": 140612348, + "step": 1113 + }, + { + "epoch": 0.28551273007118577, + "loss": 1.2977383136749268, + "loss_ce": 0.0008633724064566195, + "loss_iou": 0.58984375, + "loss_num": 0.02392578125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 140612348, + "step": 1113 + }, + { + "epoch": 0.28576925543513115, + "grad_norm": 51.17510986328125, + "learning_rate": 5e-06, + "loss": 1.2756, + "num_input_tokens_seen": 140740380, + "step": 1114 + }, + { + "epoch": 0.28576925543513115, + "loss": 1.1061838865280151, + "loss_ce": 0.0002268622483825311, + "loss_iou": 0.51953125, + "loss_num": 0.013671875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 140740380, + "step": 1114 + }, + { + "epoch": 0.28602578079907653, + "grad_norm": 48.37740707397461, + "learning_rate": 5e-06, + "loss": 1.1408, + "num_input_tokens_seen": 140865964, + "step": 1115 + }, + { + "epoch": 0.28602578079907653, + "loss": 1.0648736953735352, + "loss_ce": 0.0009088230435736477, + "loss_iou": 0.50390625, + "loss_num": 0.0106201171875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 140865964, + "step": 1115 + }, + { + "epoch": 0.28628230616302186, + "grad_norm": 101.02828979492188, + "learning_rate": 5e-06, + "loss": 1.1716, + "num_input_tokens_seen": 140991344, + "step": 1116 + }, + { + "epoch": 0.28628230616302186, + "loss": 1.2777376174926758, + "loss_ce": 0.0023468981962651014, + "loss_iou": 0.59765625, + "loss_num": 0.0166015625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 140991344, + "step": 1116 + }, + { + "epoch": 0.28653883152696724, + "grad_norm": 47.552711486816406, + "learning_rate": 5e-06, + "loss": 1.5314, + "num_input_tokens_seen": 141116896, + "step": 1117 + }, + { + "epoch": 0.28653883152696724, + "loss": 1.601276159286499, + "loss_ce": 0.0006901403539814055, + "loss_iou": 0.7265625, + "loss_num": 0.0296630859375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 141116896, + "step": 1117 + }, + { + "epoch": 0.28679535689091257, + "grad_norm": 55.222965240478516, + "learning_rate": 5e-06, + "loss": 1.2246, + "num_input_tokens_seen": 141244176, + "step": 1118 + }, + { + "epoch": 0.28679535689091257, + "loss": 1.2745847702026367, + "loss_ce": 0.002612141892313957, + "loss_iou": 0.578125, + "loss_num": 0.0233154296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 141244176, + "step": 1118 + }, + { + "epoch": 0.28705188225485795, + "grad_norm": 39.03237533569336, + "learning_rate": 5e-06, + "loss": 1.1479, + "num_input_tokens_seen": 141369744, + "step": 1119 + }, + { + "epoch": 0.28705188225485795, + "loss": 1.0725948810577393, + "loss_ce": 0.0008174998802132905, + "loss_iou": 0.50390625, + "loss_num": 0.0128173828125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 141369744, + "step": 1119 + }, + { + "epoch": 0.28730840761880333, + "grad_norm": 54.887474060058594, + "learning_rate": 5e-06, + "loss": 1.2659, + "num_input_tokens_seen": 141495936, + "step": 1120 + }, + { + "epoch": 0.28730840761880333, + "loss": 1.343334674835205, + "loss_ce": 0.005932284519076347, + "loss_iou": 0.6171875, + "loss_num": 0.0203857421875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 141495936, + "step": 1120 + }, + { + "epoch": 0.28756493298274866, + "grad_norm": 89.49302673339844, + "learning_rate": 5e-06, + "loss": 1.2563, + "num_input_tokens_seen": 141623788, + "step": 1121 + }, + { + "epoch": 0.28756493298274866, + "loss": 1.1049234867095947, + "loss_ce": 0.0009195586899295449, + "loss_iou": 0.5234375, + "loss_num": 0.01116943359375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 141623788, + "step": 1121 + }, + { + "epoch": 0.28782145834669404, + "grad_norm": 54.02817916870117, + "learning_rate": 5e-06, + "loss": 1.4341, + "num_input_tokens_seen": 141752072, + "step": 1122 + }, + { + "epoch": 0.28782145834669404, + "loss": 1.4214249849319458, + "loss_ce": 0.001014802372083068, + "loss_iou": 0.65234375, + "loss_num": 0.0224609375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 141752072, + "step": 1122 + }, + { + "epoch": 0.28807798371063936, + "grad_norm": 45.396484375, + "learning_rate": 5e-06, + "loss": 1.2583, + "num_input_tokens_seen": 141878512, + "step": 1123 + }, + { + "epoch": 0.28807798371063936, + "loss": 1.2874191999435425, + "loss_ce": 0.00030984097975306213, + "loss_iou": 0.5859375, + "loss_num": 0.022216796875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 141878512, + "step": 1123 + }, + { + "epoch": 0.28833450907458474, + "grad_norm": 47.445884704589844, + "learning_rate": 5e-06, + "loss": 1.2575, + "num_input_tokens_seen": 142004376, + "step": 1124 + }, + { + "epoch": 0.28833450907458474, + "loss": 1.1702468395233154, + "loss_ce": 0.0022780555300414562, + "loss_iou": 0.5390625, + "loss_num": 0.0179443359375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 142004376, + "step": 1124 + }, + { + "epoch": 0.2885910344385301, + "grad_norm": 44.48907470703125, + "learning_rate": 5e-06, + "loss": 1.1507, + "num_input_tokens_seen": 142130520, + "step": 1125 + }, + { + "epoch": 0.2885910344385301, + "loss": 1.008172869682312, + "loss_ce": 0.0018252030713483691, + "loss_iou": 0.4609375, + "loss_num": 0.0174560546875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 142130520, + "step": 1125 + }, + { + "epoch": 0.28884755980247545, + "grad_norm": 47.569053649902344, + "learning_rate": 5e-06, + "loss": 1.19, + "num_input_tokens_seen": 142257324, + "step": 1126 + }, + { + "epoch": 0.28884755980247545, + "loss": 1.031123399734497, + "loss_ce": 0.0025589456781744957, + "loss_iou": 0.484375, + "loss_num": 0.011474609375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 142257324, + "step": 1126 + }, + { + "epoch": 0.28910408516642083, + "grad_norm": 55.56782913208008, + "learning_rate": 5e-06, + "loss": 1.1237, + "num_input_tokens_seen": 142383724, + "step": 1127 + }, + { + "epoch": 0.28910408516642083, + "loss": 1.1582621335983276, + "loss_ce": 0.000547341420315206, + "loss_iou": 0.55078125, + "loss_num": 0.01177978515625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 142383724, + "step": 1127 + }, + { + "epoch": 0.2893606105303662, + "grad_norm": 64.89530181884766, + "learning_rate": 5e-06, + "loss": 1.1961, + "num_input_tokens_seen": 142508452, + "step": 1128 + }, + { + "epoch": 0.2893606105303662, + "loss": 1.0366743803024292, + "loss_ce": 0.0005415817722678185, + "loss_iou": 0.4921875, + "loss_num": 0.01043701171875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 142508452, + "step": 1128 + }, + { + "epoch": 0.28961713589431154, + "grad_norm": 57.49004364013672, + "learning_rate": 5e-06, + "loss": 1.2986, + "num_input_tokens_seen": 142635928, + "step": 1129 + }, + { + "epoch": 0.28961713589431154, + "loss": 1.4033167362213135, + "loss_ce": 0.0048792739398777485, + "loss_iou": 0.6328125, + "loss_num": 0.02685546875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 142635928, + "step": 1129 + }, + { + "epoch": 0.2898736612582569, + "grad_norm": 58.971309661865234, + "learning_rate": 5e-06, + "loss": 1.3078, + "num_input_tokens_seen": 142762040, + "step": 1130 + }, + { + "epoch": 0.2898736612582569, + "loss": 1.4680778980255127, + "loss_ce": 0.004210685845464468, + "loss_iou": 0.640625, + "loss_num": 0.03564453125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 142762040, + "step": 1130 + }, + { + "epoch": 0.29013018662220225, + "grad_norm": 47.310089111328125, + "learning_rate": 5e-06, + "loss": 1.3731, + "num_input_tokens_seen": 142887628, + "step": 1131 + }, + { + "epoch": 0.29013018662220225, + "loss": 1.6404972076416016, + "loss_ce": 0.003778393380343914, + "loss_iou": 0.7265625, + "loss_num": 0.0361328125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 142887628, + "step": 1131 + }, + { + "epoch": 0.2903867119861476, + "grad_norm": 57.33110427856445, + "learning_rate": 5e-06, + "loss": 1.1617, + "num_input_tokens_seen": 143013672, + "step": 1132 + }, + { + "epoch": 0.2903867119861476, + "loss": 1.0562571287155151, + "loss_ce": 0.0005930407205596566, + "loss_iou": 0.494140625, + "loss_num": 0.01336669921875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 143013672, + "step": 1132 + }, + { + "epoch": 0.290643237350093, + "grad_norm": 47.94330596923828, + "learning_rate": 5e-06, + "loss": 1.2711, + "num_input_tokens_seen": 143139444, + "step": 1133 + }, + { + "epoch": 0.290643237350093, + "loss": 1.3210477828979492, + "loss_ce": 0.001711850636638701, + "loss_iou": 0.59765625, + "loss_num": 0.0245361328125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 143139444, + "step": 1133 + }, + { + "epoch": 0.29089976271403833, + "grad_norm": 99.46900939941406, + "learning_rate": 5e-06, + "loss": 1.2399, + "num_input_tokens_seen": 143266120, + "step": 1134 + }, + { + "epoch": 0.29089976271403833, + "loss": 1.2280025482177734, + "loss_ce": 0.0004634447686839849, + "loss_iou": 0.57421875, + "loss_num": 0.01556396484375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 143266120, + "step": 1134 + }, + { + "epoch": 0.2911562880779837, + "grad_norm": 49.935401916503906, + "learning_rate": 5e-06, + "loss": 1.4529, + "num_input_tokens_seen": 143392508, + "step": 1135 + }, + { + "epoch": 0.2911562880779837, + "loss": 1.466294288635254, + "loss_ce": 0.00047407514648512006, + "loss_iou": 0.671875, + "loss_num": 0.0240478515625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 143392508, + "step": 1135 + }, + { + "epoch": 0.2914128134419291, + "grad_norm": 42.141883850097656, + "learning_rate": 5e-06, + "loss": 1.2657, + "num_input_tokens_seen": 143518636, + "step": 1136 + }, + { + "epoch": 0.2914128134419291, + "loss": 0.9636263251304626, + "loss_ce": 0.0017122298013418913, + "loss_iou": 0.45703125, + "loss_num": 0.00970458984375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 143518636, + "step": 1136 + }, + { + "epoch": 0.2916693388058744, + "grad_norm": 30.46164321899414, + "learning_rate": 5e-06, + "loss": 1.2299, + "num_input_tokens_seen": 143645108, + "step": 1137 + }, + { + "epoch": 0.2916693388058744, + "loss": 1.2027699947357178, + "loss_ce": 0.0011098445393145084, + "loss_iou": 0.54296875, + "loss_num": 0.0235595703125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 143645108, + "step": 1137 + }, + { + "epoch": 0.2919258641698198, + "grad_norm": 45.9459342956543, + "learning_rate": 5e-06, + "loss": 1.2756, + "num_input_tokens_seen": 143771732, + "step": 1138 + }, + { + "epoch": 0.2919258641698198, + "loss": 1.3004601001739502, + "loss_ce": 0.0006554399151355028, + "loss_iou": 0.60546875, + "loss_num": 0.018310546875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 143771732, + "step": 1138 + }, + { + "epoch": 0.29218238953376513, + "grad_norm": 96.11602020263672, + "learning_rate": 5e-06, + "loss": 1.222, + "num_input_tokens_seen": 143899160, + "step": 1139 + }, + { + "epoch": 0.29218238953376513, + "loss": 1.0566319227218628, + "loss_ce": 0.0014560867566615343, + "loss_iou": 0.494140625, + "loss_num": 0.013671875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 143899160, + "step": 1139 + }, + { + "epoch": 0.2924389148977105, + "grad_norm": 49.9255256652832, + "learning_rate": 5e-06, + "loss": 1.447, + "num_input_tokens_seen": 144025520, + "step": 1140 + }, + { + "epoch": 0.2924389148977105, + "loss": 1.3974721431732178, + "loss_ce": 0.000987836392596364, + "loss_iou": 0.6484375, + "loss_num": 0.01904296875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 144025520, + "step": 1140 + }, + { + "epoch": 0.2926954402616559, + "grad_norm": 46.07719039916992, + "learning_rate": 5e-06, + "loss": 1.2442, + "num_input_tokens_seen": 144152616, + "step": 1141 + }, + { + "epoch": 0.2926954402616559, + "loss": 1.0212448835372925, + "loss_ce": 0.0007370659150183201, + "loss_iou": 0.474609375, + "loss_num": 0.01397705078125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 144152616, + "step": 1141 + }, + { + "epoch": 0.2929519656256012, + "grad_norm": 58.803794860839844, + "learning_rate": 5e-06, + "loss": 1.2346, + "num_input_tokens_seen": 144278420, + "step": 1142 + }, + { + "epoch": 0.2929519656256012, + "loss": 1.4388103485107422, + "loss_ce": 0.0013104206882417202, + "loss_iou": 0.66015625, + "loss_num": 0.0235595703125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 144278420, + "step": 1142 + }, + { + "epoch": 0.2932084909895466, + "grad_norm": 61.16276168823242, + "learning_rate": 5e-06, + "loss": 1.2525, + "num_input_tokens_seen": 144404408, + "step": 1143 + }, + { + "epoch": 0.2932084909895466, + "loss": 1.1892489194869995, + "loss_ce": 0.003213821444660425, + "loss_iou": 0.54296875, + "loss_num": 0.0191650390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 144404408, + "step": 1143 + }, + { + "epoch": 0.2934650163534919, + "grad_norm": 91.4574203491211, + "learning_rate": 5e-06, + "loss": 1.3403, + "num_input_tokens_seen": 144531176, + "step": 1144 + }, + { + "epoch": 0.2934650163534919, + "loss": 1.2727622985839844, + "loss_ce": 0.0012779267271980643, + "loss_iou": 0.5859375, + "loss_num": 0.0191650390625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 144531176, + "step": 1144 + }, + { + "epoch": 0.2937215417174373, + "grad_norm": 45.77968215942383, + "learning_rate": 5e-06, + "loss": 1.44, + "num_input_tokens_seen": 144657448, + "step": 1145 + }, + { + "epoch": 0.2937215417174373, + "loss": 1.5217342376708984, + "loss_ce": 0.0017147797625511885, + "loss_iou": 0.671875, + "loss_num": 0.03466796875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 144657448, + "step": 1145 + }, + { + "epoch": 0.2939780670813827, + "grad_norm": 46.609737396240234, + "learning_rate": 5e-06, + "loss": 1.0955, + "num_input_tokens_seen": 144783504, + "step": 1146 + }, + { + "epoch": 0.2939780670813827, + "loss": 1.194151759147644, + "loss_ce": 0.0027455128729343414, + "loss_iou": 0.55078125, + "loss_num": 0.0185546875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 144783504, + "step": 1146 + }, + { + "epoch": 0.294234592445328, + "grad_norm": 56.23094177246094, + "learning_rate": 5e-06, + "loss": 1.2731, + "num_input_tokens_seen": 144908596, + "step": 1147 + }, + { + "epoch": 0.294234592445328, + "loss": 1.1289995908737183, + "loss_ce": 0.0010698674013838172, + "loss_iou": 0.51953125, + "loss_num": 0.0179443359375, + "loss_xval": 1.125, + "num_input_tokens_seen": 144908596, + "step": 1147 + }, + { + "epoch": 0.2944911178092734, + "grad_norm": 54.55766296386719, + "learning_rate": 5e-06, + "loss": 1.2227, + "num_input_tokens_seen": 145034208, + "step": 1148 + }, + { + "epoch": 0.2944911178092734, + "loss": 1.378260612487793, + "loss_ce": 0.0032607223838567734, + "loss_iou": 0.63671875, + "loss_num": 0.020751953125, + "loss_xval": 1.375, + "num_input_tokens_seen": 145034208, + "step": 1148 + }, + { + "epoch": 0.2947476431732188, + "grad_norm": 57.792205810546875, + "learning_rate": 5e-06, + "loss": 1.1573, + "num_input_tokens_seen": 145160152, + "step": 1149 + }, + { + "epoch": 0.2947476431732188, + "loss": 1.06233811378479, + "loss_ce": 0.00032641630969010293, + "loss_iou": 0.498046875, + "loss_num": 0.01324462890625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 145160152, + "step": 1149 + }, + { + "epoch": 0.2950041685371641, + "grad_norm": 59.97188949584961, + "learning_rate": 5e-06, + "loss": 1.2556, + "num_input_tokens_seen": 145286140, + "step": 1150 + }, + { + "epoch": 0.2950041685371641, + "loss": 1.271167278289795, + "loss_ce": 0.0006594822043552995, + "loss_iou": 0.5859375, + "loss_num": 0.0189208984375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 145286140, + "step": 1150 + }, + { + "epoch": 0.2952606939011095, + "grad_norm": 67.49483489990234, + "learning_rate": 5e-06, + "loss": 1.1341, + "num_input_tokens_seen": 145412540, + "step": 1151 + }, + { + "epoch": 0.2952606939011095, + "loss": 1.2529656887054443, + "loss_ce": 0.001989130862057209, + "loss_iou": 0.58203125, + "loss_num": 0.017333984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 145412540, + "step": 1151 + }, + { + "epoch": 0.2955172192650548, + "grad_norm": 55.4300422668457, + "learning_rate": 5e-06, + "loss": 1.1382, + "num_input_tokens_seen": 145538812, + "step": 1152 + }, + { + "epoch": 0.2955172192650548, + "loss": 1.0652270317077637, + "loss_ce": 0.0012621500063687563, + "loss_iou": 0.5078125, + "loss_num": 0.009765625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 145538812, + "step": 1152 + }, + { + "epoch": 0.2957737446290002, + "grad_norm": 41.76433563232422, + "learning_rate": 5e-06, + "loss": 1.1255, + "num_input_tokens_seen": 145665132, + "step": 1153 + }, + { + "epoch": 0.2957737446290002, + "loss": 0.9830601811408997, + "loss_ce": 0.00161486747674644, + "loss_iou": 0.46875, + "loss_num": 0.0087890625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 145665132, + "step": 1153 + }, + { + "epoch": 0.29603026999294557, + "grad_norm": 57.860252380371094, + "learning_rate": 5e-06, + "loss": 1.2561, + "num_input_tokens_seen": 145792128, + "step": 1154 + }, + { + "epoch": 0.29603026999294557, + "loss": 1.2943000793457031, + "loss_ce": 0.0003547464148141444, + "loss_iou": 0.60546875, + "loss_num": 0.0166015625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 145792128, + "step": 1154 + }, + { + "epoch": 0.2962867953568909, + "grad_norm": 87.00450134277344, + "learning_rate": 5e-06, + "loss": 1.2804, + "num_input_tokens_seen": 145919608, + "step": 1155 + }, + { + "epoch": 0.2962867953568909, + "loss": 1.121140956878662, + "loss_ce": 0.00029146071756258607, + "loss_iou": 0.52734375, + "loss_num": 0.01385498046875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 145919608, + "step": 1155 + }, + { + "epoch": 0.2965433207208363, + "grad_norm": 50.82177734375, + "learning_rate": 5e-06, + "loss": 1.2279, + "num_input_tokens_seen": 146045008, + "step": 1156 + }, + { + "epoch": 0.2965433207208363, + "loss": 1.368868112564087, + "loss_ce": 0.0007040193304419518, + "loss_iou": 0.63671875, + "loss_num": 0.018798828125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 146045008, + "step": 1156 + }, + { + "epoch": 0.29679984608478166, + "grad_norm": 37.024051666259766, + "learning_rate": 5e-06, + "loss": 1.3446, + "num_input_tokens_seen": 146171092, + "step": 1157 + }, + { + "epoch": 0.29679984608478166, + "loss": 1.300889015197754, + "loss_ce": 0.0005959449335932732, + "loss_iou": 0.609375, + "loss_num": 0.0169677734375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 146171092, + "step": 1157 + }, + { + "epoch": 0.297056371448727, + "grad_norm": 45.63780212402344, + "learning_rate": 5e-06, + "loss": 1.2097, + "num_input_tokens_seen": 146298408, + "step": 1158 + }, + { + "epoch": 0.297056371448727, + "loss": 1.1684479713439941, + "loss_ce": 0.0029206478502601385, + "loss_iou": 0.53125, + "loss_num": 0.02099609375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 146298408, + "step": 1158 + }, + { + "epoch": 0.29731289681267237, + "grad_norm": 83.1688232421875, + "learning_rate": 5e-06, + "loss": 1.2941, + "num_input_tokens_seen": 146425060, + "step": 1159 + }, + { + "epoch": 0.29731289681267237, + "loss": 1.3153468370437622, + "loss_ce": 0.0018702792003750801, + "loss_iou": 0.60546875, + "loss_num": 0.02001953125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 146425060, + "step": 1159 + }, + { + "epoch": 0.2975694221766177, + "grad_norm": 44.54999923706055, + "learning_rate": 5e-06, + "loss": 1.363, + "num_input_tokens_seen": 146551192, + "step": 1160 + }, + { + "epoch": 0.2975694221766177, + "loss": 1.4388405084609985, + "loss_ce": 0.0032936418429017067, + "loss_iou": 0.64453125, + "loss_num": 0.0289306640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 146551192, + "step": 1160 + }, + { + "epoch": 0.2978259475405631, + "grad_norm": 38.99470520019531, + "learning_rate": 5e-06, + "loss": 1.1429, + "num_input_tokens_seen": 146677340, + "step": 1161 + }, + { + "epoch": 0.2978259475405631, + "loss": 0.9995183944702148, + "loss_ce": 0.0029363343492150307, + "loss_iou": 0.46484375, + "loss_num": 0.01348876953125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 146677340, + "step": 1161 + }, + { + "epoch": 0.29808247290450846, + "grad_norm": 44.64743423461914, + "learning_rate": 5e-06, + "loss": 1.2005, + "num_input_tokens_seen": 146803632, + "step": 1162 + }, + { + "epoch": 0.29808247290450846, + "loss": 0.9928357601165771, + "loss_ce": 0.001136547653004527, + "loss_iou": 0.4609375, + "loss_num": 0.01409912109375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 146803632, + "step": 1162 + }, + { + "epoch": 0.2983389982684538, + "grad_norm": 70.78120422363281, + "learning_rate": 5e-06, + "loss": 1.1713, + "num_input_tokens_seen": 146929016, + "step": 1163 + }, + { + "epoch": 0.2983389982684538, + "loss": 1.1006577014923096, + "loss_ce": 0.002024984685704112, + "loss_iou": 0.5, + "loss_num": 0.02001953125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 146929016, + "step": 1163 + }, + { + "epoch": 0.29859552363239916, + "grad_norm": 51.52165603637695, + "learning_rate": 5e-06, + "loss": 1.3631, + "num_input_tokens_seen": 147055032, + "step": 1164 + }, + { + "epoch": 0.29859552363239916, + "loss": 1.4689750671386719, + "loss_ce": 0.003154870355501771, + "loss_iou": 0.6640625, + "loss_num": 0.0269775390625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 147055032, + "step": 1164 + }, + { + "epoch": 0.2988520489963445, + "grad_norm": 56.306758880615234, + "learning_rate": 5e-06, + "loss": 1.1852, + "num_input_tokens_seen": 147181796, + "step": 1165 + }, + { + "epoch": 0.2988520489963445, + "loss": 1.1660836935043335, + "loss_ce": 0.0005563480081036687, + "loss_iou": 0.5546875, + "loss_num": 0.01153564453125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 147181796, + "step": 1165 + }, + { + "epoch": 0.29910857436028987, + "grad_norm": 46.257015228271484, + "learning_rate": 5e-06, + "loss": 1.4458, + "num_input_tokens_seen": 147307832, + "step": 1166 + }, + { + "epoch": 0.29910857436028987, + "loss": 1.641001582145691, + "loss_ce": 0.0013531562872231007, + "loss_iou": 0.7421875, + "loss_num": 0.031494140625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 147307832, + "step": 1166 + }, + { + "epoch": 0.29936509972423525, + "grad_norm": 29.73887062072754, + "learning_rate": 5e-06, + "loss": 1.0751, + "num_input_tokens_seen": 147432760, + "step": 1167 + }, + { + "epoch": 0.29936509972423525, + "loss": 1.147862195968628, + "loss_ce": 0.0023543578572571278, + "loss_iou": 0.53125, + "loss_num": 0.0164794921875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 147432760, + "step": 1167 + }, + { + "epoch": 0.2996216250881806, + "grad_norm": 45.34124755859375, + "learning_rate": 5e-06, + "loss": 1.2131, + "num_input_tokens_seen": 147559444, + "step": 1168 + }, + { + "epoch": 0.2996216250881806, + "loss": 1.0978922843933105, + "loss_ce": 0.00023604347370564938, + "loss_iou": 0.515625, + "loss_num": 0.01251220703125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 147559444, + "step": 1168 + }, + { + "epoch": 0.29987815045212596, + "grad_norm": 92.46239471435547, + "learning_rate": 5e-06, + "loss": 1.3267, + "num_input_tokens_seen": 147686848, + "step": 1169 + }, + { + "epoch": 0.29987815045212596, + "loss": 1.3516497611999512, + "loss_ce": 0.0015521723544225097, + "loss_iou": 0.62109375, + "loss_num": 0.02197265625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 147686848, + "step": 1169 + }, + { + "epoch": 0.30013467581607134, + "grad_norm": 43.90057373046875, + "learning_rate": 5e-06, + "loss": 1.4049, + "num_input_tokens_seen": 147812184, + "step": 1170 + }, + { + "epoch": 0.30013467581607134, + "loss": 1.411547064781189, + "loss_ce": 0.0004142364487051964, + "loss_iou": 0.6328125, + "loss_num": 0.02880859375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 147812184, + "step": 1170 + }, + { + "epoch": 0.30039120118001666, + "grad_norm": 53.345947265625, + "learning_rate": 5e-06, + "loss": 1.237, + "num_input_tokens_seen": 147939684, + "step": 1171 + }, + { + "epoch": 0.30039120118001666, + "loss": 1.3145030736923218, + "loss_ce": 0.0029796408489346504, + "loss_iou": 0.60546875, + "loss_num": 0.020751953125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 147939684, + "step": 1171 + }, + { + "epoch": 0.30064772654396205, + "grad_norm": 90.21387481689453, + "learning_rate": 5e-06, + "loss": 1.2353, + "num_input_tokens_seen": 148065432, + "step": 1172 + }, + { + "epoch": 0.30064772654396205, + "loss": 1.4320333003997803, + "loss_ce": 0.004787145648151636, + "loss_iou": 0.65625, + "loss_num": 0.0235595703125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 148065432, + "step": 1172 + }, + { + "epoch": 0.30090425190790737, + "grad_norm": 41.570884704589844, + "learning_rate": 5e-06, + "loss": 1.3651, + "num_input_tokens_seen": 148191220, + "step": 1173 + }, + { + "epoch": 0.30090425190790737, + "loss": 1.3338682651519775, + "loss_ce": 0.0013487989781424403, + "loss_iou": 0.62109375, + "loss_num": 0.017578125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 148191220, + "step": 1173 + }, + { + "epoch": 0.30116077727185275, + "grad_norm": 35.931846618652344, + "learning_rate": 5e-06, + "loss": 1.2534, + "num_input_tokens_seen": 148316388, + "step": 1174 + }, + { + "epoch": 0.30116077727185275, + "loss": 1.1850210428237915, + "loss_ce": 0.0077750482596457005, + "loss_iou": 0.54296875, + "loss_num": 0.017822265625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 148316388, + "step": 1174 + }, + { + "epoch": 0.30141730263579813, + "grad_norm": 46.10340881347656, + "learning_rate": 5e-06, + "loss": 1.2273, + "num_input_tokens_seen": 148442800, + "step": 1175 + }, + { + "epoch": 0.30141730263579813, + "loss": 1.0167725086212158, + "loss_ce": 0.0006591601995751262, + "loss_iou": 0.47265625, + "loss_num": 0.01373291015625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 148442800, + "step": 1175 + }, + { + "epoch": 0.30167382799974346, + "grad_norm": 48.4448356628418, + "learning_rate": 5e-06, + "loss": 1.2832, + "num_input_tokens_seen": 148570168, + "step": 1176 + }, + { + "epoch": 0.30167382799974346, + "loss": 1.2180843353271484, + "loss_ce": 0.0007991014863364398, + "loss_iou": 0.57421875, + "loss_num": 0.01434326171875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 148570168, + "step": 1176 + }, + { + "epoch": 0.30193035336368884, + "grad_norm": 40.62450408935547, + "learning_rate": 5e-06, + "loss": 1.2471, + "num_input_tokens_seen": 148695392, + "step": 1177 + }, + { + "epoch": 0.30193035336368884, + "loss": 1.3547073602676392, + "loss_ce": 0.001680118264630437, + "loss_iou": 0.59765625, + "loss_num": 0.031494140625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 148695392, + "step": 1177 + }, + { + "epoch": 0.30218687872763417, + "grad_norm": 60.83898162841797, + "learning_rate": 5e-06, + "loss": 1.0865, + "num_input_tokens_seen": 148821980, + "step": 1178 + }, + { + "epoch": 0.30218687872763417, + "loss": 1.066332221031189, + "loss_ce": 0.00041423720540478826, + "loss_iou": 0.49609375, + "loss_num": 0.0152587890625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 148821980, + "step": 1178 + }, + { + "epoch": 0.30244340409157955, + "grad_norm": 89.45770263671875, + "learning_rate": 5e-06, + "loss": 1.3372, + "num_input_tokens_seen": 148948128, + "step": 1179 + }, + { + "epoch": 0.30244340409157955, + "loss": 1.246108889579773, + "loss_ce": 0.005386186297982931, + "loss_iou": 0.55078125, + "loss_num": 0.0269775390625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 148948128, + "step": 1179 + }, + { + "epoch": 0.30269992945552493, + "grad_norm": 52.05886459350586, + "learning_rate": 5e-06, + "loss": 1.3733, + "num_input_tokens_seen": 149074444, + "step": 1180 + }, + { + "epoch": 0.30269992945552493, + "loss": 1.2411415576934814, + "loss_ce": 0.0009071852546185255, + "loss_iou": 0.5859375, + "loss_num": 0.01318359375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 149074444, + "step": 1180 + }, + { + "epoch": 0.30295645481947026, + "grad_norm": 30.303001403808594, + "learning_rate": 5e-06, + "loss": 1.108, + "num_input_tokens_seen": 149199584, + "step": 1181 + }, + { + "epoch": 0.30295645481947026, + "loss": 1.2709537744522095, + "loss_ce": 0.0004459265910554677, + "loss_iou": 0.56640625, + "loss_num": 0.0267333984375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 149199584, + "step": 1181 + }, + { + "epoch": 0.30321298018341564, + "grad_norm": 46.278995513916016, + "learning_rate": 5e-06, + "loss": 1.0841, + "num_input_tokens_seen": 149326436, + "step": 1182 + }, + { + "epoch": 0.30321298018341564, + "loss": 1.2528784275054932, + "loss_ce": 0.0004370058886706829, + "loss_iou": 0.5703125, + "loss_num": 0.0216064453125, + "loss_xval": 1.25, + "num_input_tokens_seen": 149326436, + "step": 1182 + }, + { + "epoch": 0.303469505547361, + "grad_norm": 49.83050537109375, + "learning_rate": 5e-06, + "loss": 1.23, + "num_input_tokens_seen": 149451836, + "step": 1183 + }, + { + "epoch": 0.303469505547361, + "loss": 1.4447295665740967, + "loss_ce": 0.005276510491967201, + "loss_iou": 0.65234375, + "loss_num": 0.026611328125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 149451836, + "step": 1183 + }, + { + "epoch": 0.30372603091130634, + "grad_norm": 54.654136657714844, + "learning_rate": 5e-06, + "loss": 1.2915, + "num_input_tokens_seen": 149578948, + "step": 1184 + }, + { + "epoch": 0.30372603091130634, + "loss": 1.4606566429138184, + "loss_ce": 0.004601949825882912, + "loss_iou": 0.65625, + "loss_num": 0.02783203125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 149578948, + "step": 1184 + }, + { + "epoch": 0.3039825562752517, + "grad_norm": 51.099666595458984, + "learning_rate": 5e-06, + "loss": 1.2468, + "num_input_tokens_seen": 149704788, + "step": 1185 + }, + { + "epoch": 0.3039825562752517, + "loss": 1.1831636428833008, + "loss_ce": 0.0034760974813252687, + "loss_iou": 0.55859375, + "loss_num": 0.01251220703125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 149704788, + "step": 1185 + }, + { + "epoch": 0.30423908163919705, + "grad_norm": 53.0371208190918, + "learning_rate": 5e-06, + "loss": 1.2256, + "num_input_tokens_seen": 149831912, + "step": 1186 + }, + { + "epoch": 0.30423908163919705, + "loss": 1.1522853374481201, + "loss_ce": 0.0023830407299101353, + "loss_iou": 0.51953125, + "loss_num": 0.02294921875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 149831912, + "step": 1186 + }, + { + "epoch": 0.30449560700314243, + "grad_norm": 55.173126220703125, + "learning_rate": 5e-06, + "loss": 1.2029, + "num_input_tokens_seen": 149958944, + "step": 1187 + }, + { + "epoch": 0.30449560700314243, + "loss": 1.229234218597412, + "loss_ce": 0.0007186041912063956, + "loss_iou": 0.57421875, + "loss_num": 0.0164794921875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 149958944, + "step": 1187 + }, + { + "epoch": 0.3047521323670878, + "grad_norm": 68.9417953491211, + "learning_rate": 5e-06, + "loss": 1.2019, + "num_input_tokens_seen": 150085704, + "step": 1188 + }, + { + "epoch": 0.3047521323670878, + "loss": 1.296430230140686, + "loss_ce": 0.00394979864358902, + "loss_iou": 0.6015625, + "loss_num": 0.0179443359375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 150085704, + "step": 1188 + }, + { + "epoch": 0.30500865773103314, + "grad_norm": 55.345252990722656, + "learning_rate": 5e-06, + "loss": 1.3393, + "num_input_tokens_seen": 150212280, + "step": 1189 + }, + { + "epoch": 0.30500865773103314, + "loss": 1.2074825763702393, + "loss_ce": 0.0009396728128194809, + "loss_iou": 0.57421875, + "loss_num": 0.01165771484375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 150212280, + "step": 1189 + }, + { + "epoch": 0.3052651830949785, + "grad_norm": 39.968711853027344, + "learning_rate": 5e-06, + "loss": 1.1119, + "num_input_tokens_seen": 150338548, + "step": 1190 + }, + { + "epoch": 0.3052651830949785, + "loss": 1.3174371719360352, + "loss_ce": 0.002495687920600176, + "loss_iou": 0.58203125, + "loss_num": 0.02978515625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 150338548, + "step": 1190 + }, + { + "epoch": 0.3055217084589239, + "grad_norm": 44.61415100097656, + "learning_rate": 5e-06, + "loss": 1.2381, + "num_input_tokens_seen": 150463908, + "step": 1191 + }, + { + "epoch": 0.3055217084589239, + "loss": 1.4491472244262695, + "loss_ce": 0.0028581940568983555, + "loss_iou": 0.6484375, + "loss_num": 0.0294189453125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 150463908, + "step": 1191 + }, + { + "epoch": 0.3057782338228692, + "grad_norm": 78.20832824707031, + "learning_rate": 5e-06, + "loss": 1.2486, + "num_input_tokens_seen": 150590400, + "step": 1192 + }, + { + "epoch": 0.3057782338228692, + "loss": 1.3033801317214966, + "loss_ce": 0.001134048099629581, + "loss_iou": 0.59765625, + "loss_num": 0.0208740234375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 150590400, + "step": 1192 + }, + { + "epoch": 0.3060347591868146, + "grad_norm": 50.78615188598633, + "learning_rate": 5e-06, + "loss": 1.3751, + "num_input_tokens_seen": 150716496, + "step": 1193 + }, + { + "epoch": 0.3060347591868146, + "loss": 1.4013301134109497, + "loss_ce": 0.0033808862790465355, + "loss_iou": 0.6328125, + "loss_num": 0.02587890625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 150716496, + "step": 1193 + }, + { + "epoch": 0.30629128455075993, + "grad_norm": 39.1738395690918, + "learning_rate": 5e-06, + "loss": 1.1664, + "num_input_tokens_seen": 150842332, + "step": 1194 + }, + { + "epoch": 0.30629128455075993, + "loss": 1.2083321809768677, + "loss_ce": 0.0027658059261739254, + "loss_iou": 0.53515625, + "loss_num": 0.0269775390625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 150842332, + "step": 1194 + }, + { + "epoch": 0.3065478099147053, + "grad_norm": 47.1965217590332, + "learning_rate": 5e-06, + "loss": 1.0615, + "num_input_tokens_seen": 150968036, + "step": 1195 + }, + { + "epoch": 0.3065478099147053, + "loss": 1.083253026008606, + "loss_ce": 0.004151465371251106, + "loss_iou": 0.478515625, + "loss_num": 0.0244140625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 150968036, + "step": 1195 + }, + { + "epoch": 0.3068043352786507, + "grad_norm": 51.76456069946289, + "learning_rate": 5e-06, + "loss": 1.1056, + "num_input_tokens_seen": 151094340, + "step": 1196 + }, + { + "epoch": 0.3068043352786507, + "loss": 1.2444560527801514, + "loss_ce": 0.0017801887588575482, + "loss_iou": 0.5859375, + "loss_num": 0.01495361328125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 151094340, + "step": 1196 + }, + { + "epoch": 0.307060860642596, + "grad_norm": 66.48481750488281, + "learning_rate": 5e-06, + "loss": 1.2329, + "num_input_tokens_seen": 151220712, + "step": 1197 + }, + { + "epoch": 0.307060860642596, + "loss": 1.2583158016204834, + "loss_ce": 0.001968119293451309, + "loss_iou": 0.58203125, + "loss_num": 0.0184326171875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 151220712, + "step": 1197 + }, + { + "epoch": 0.3073173860065414, + "grad_norm": 49.40713882446289, + "learning_rate": 5e-06, + "loss": 1.2568, + "num_input_tokens_seen": 151347000, + "step": 1198 + }, + { + "epoch": 0.3073173860065414, + "loss": 1.142624855041504, + "loss_ce": 0.00102332909591496, + "loss_iou": 0.54296875, + "loss_num": 0.01202392578125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 151347000, + "step": 1198 + }, + { + "epoch": 0.30757391137048673, + "grad_norm": 35.816810607910156, + "learning_rate": 5e-06, + "loss": 1.2191, + "num_input_tokens_seen": 151474024, + "step": 1199 + }, + { + "epoch": 0.30757391137048673, + "loss": 1.19255530834198, + "loss_ce": 0.0021256303880363703, + "loss_iou": 0.53125, + "loss_num": 0.0263671875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 151474024, + "step": 1199 + }, + { + "epoch": 0.3078304367344321, + "grad_norm": 51.681129455566406, + "learning_rate": 5e-06, + "loss": 1.2157, + "num_input_tokens_seen": 151599220, + "step": 1200 + }, + { + "epoch": 0.3078304367344321, + "loss": 1.1276705265045166, + "loss_ce": 0.0016939828637987375, + "loss_iou": 0.5078125, + "loss_num": 0.0213623046875, + "loss_xval": 1.125, + "num_input_tokens_seen": 151599220, + "step": 1200 + }, + { + "epoch": 0.3080869620983775, + "grad_norm": 92.8583984375, + "learning_rate": 5e-06, + "loss": 1.2713, + "num_input_tokens_seen": 151726424, + "step": 1201 + }, + { + "epoch": 0.3080869620983775, + "loss": 1.1594326496124268, + "loss_ce": 0.00171779899392277, + "loss_iou": 0.54296875, + "loss_num": 0.01409912109375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 151726424, + "step": 1201 + }, + { + "epoch": 0.3083434874623228, + "grad_norm": 46.209228515625, + "learning_rate": 5e-06, + "loss": 1.3013, + "num_input_tokens_seen": 151852080, + "step": 1202 + }, + { + "epoch": 0.3083434874623228, + "loss": 1.4813125133514404, + "loss_ce": 0.0027968569193035364, + "loss_iou": 0.66015625, + "loss_num": 0.031494140625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 151852080, + "step": 1202 + }, + { + "epoch": 0.3086000128262682, + "grad_norm": 50.171730041503906, + "learning_rate": 5e-06, + "loss": 1.0705, + "num_input_tokens_seen": 151978676, + "step": 1203 + }, + { + "epoch": 0.3086000128262682, + "loss": 1.0444520711898804, + "loss_ce": 0.0005067629390396178, + "loss_iou": 0.49609375, + "loss_num": 0.0101318359375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 151978676, + "step": 1203 + }, + { + "epoch": 0.3088565381902136, + "grad_norm": 73.17893981933594, + "learning_rate": 5e-06, + "loss": 1.2153, + "num_input_tokens_seen": 152106568, + "step": 1204 + }, + { + "epoch": 0.3088565381902136, + "loss": 1.2319436073303223, + "loss_ce": 0.001963050337508321, + "loss_iou": 0.5546875, + "loss_num": 0.024169921875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 152106568, + "step": 1204 + }, + { + "epoch": 0.3091130635541589, + "grad_norm": 62.97467803955078, + "learning_rate": 5e-06, + "loss": 1.2845, + "num_input_tokens_seen": 152233644, + "step": 1205 + }, + { + "epoch": 0.3091130635541589, + "loss": 1.0797507762908936, + "loss_ce": 0.0011375478934496641, + "loss_iou": 0.50390625, + "loss_num": 0.0135498046875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 152233644, + "step": 1205 + }, + { + "epoch": 0.3093695889181043, + "grad_norm": 39.59813690185547, + "learning_rate": 5e-06, + "loss": 1.1957, + "num_input_tokens_seen": 152359500, + "step": 1206 + }, + { + "epoch": 0.3093695889181043, + "loss": 1.037702202796936, + "loss_ce": 0.0010811197571456432, + "loss_iou": 0.48046875, + "loss_num": 0.01495361328125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 152359500, + "step": 1206 + }, + { + "epoch": 0.3096261142820496, + "grad_norm": 42.385833740234375, + "learning_rate": 5e-06, + "loss": 1.1144, + "num_input_tokens_seen": 152485672, + "step": 1207 + }, + { + "epoch": 0.3096261142820496, + "loss": 1.1238529682159424, + "loss_ce": 0.0012944028712809086, + "loss_iou": 0.5234375, + "loss_num": 0.0159912109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 152485672, + "step": 1207 + }, + { + "epoch": 0.309882639645995, + "grad_norm": 33.187007904052734, + "learning_rate": 5e-06, + "loss": 1.2727, + "num_input_tokens_seen": 152611156, + "step": 1208 + }, + { + "epoch": 0.309882639645995, + "loss": 1.3562543392181396, + "loss_ce": 0.005180067382752895, + "loss_iou": 0.62109375, + "loss_num": 0.0211181640625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 152611156, + "step": 1208 + }, + { + "epoch": 0.3101391650099404, + "grad_norm": 38.59255599975586, + "learning_rate": 5e-06, + "loss": 1.1779, + "num_input_tokens_seen": 152736564, + "step": 1209 + }, + { + "epoch": 0.3101391650099404, + "loss": 1.1265063285827637, + "loss_ce": 0.0015063012251630425, + "loss_iou": 0.5078125, + "loss_num": 0.021240234375, + "loss_xval": 1.125, + "num_input_tokens_seen": 152736564, + "step": 1209 + }, + { + "epoch": 0.3103956903738857, + "grad_norm": 61.07640075683594, + "learning_rate": 5e-06, + "loss": 1.2156, + "num_input_tokens_seen": 152862560, + "step": 1210 + }, + { + "epoch": 0.3103956903738857, + "loss": 1.118615984916687, + "loss_ce": 0.000940218917094171, + "loss_iou": 0.5078125, + "loss_num": 0.0196533203125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 152862560, + "step": 1210 + }, + { + "epoch": 0.3106522157378311, + "grad_norm": 49.71195983886719, + "learning_rate": 5e-06, + "loss": 1.2998, + "num_input_tokens_seen": 152989168, + "step": 1211 + }, + { + "epoch": 0.3106522157378311, + "loss": 1.4775209426879883, + "loss_ce": 0.007306137587875128, + "loss_iou": 0.66015625, + "loss_num": 0.030517578125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 152989168, + "step": 1211 + }, + { + "epoch": 0.31090874110177646, + "grad_norm": 34.205448150634766, + "learning_rate": 5e-06, + "loss": 1.2167, + "num_input_tokens_seen": 153116452, + "step": 1212 + }, + { + "epoch": 0.31090874110177646, + "loss": 1.3158230781555176, + "loss_ce": 0.003323036478832364, + "loss_iou": 0.58984375, + "loss_num": 0.0269775390625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 153116452, + "step": 1212 + }, + { + "epoch": 0.3111652664657218, + "grad_norm": 49.828285217285156, + "learning_rate": 5e-06, + "loss": 1.2026, + "num_input_tokens_seen": 153244012, + "step": 1213 + }, + { + "epoch": 0.3111652664657218, + "loss": 1.1534075736999512, + "loss_ce": 0.0005755729507654905, + "loss_iou": 0.5390625, + "loss_num": 0.01446533203125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 153244012, + "step": 1213 + }, + { + "epoch": 0.31142179182966717, + "grad_norm": 74.77185821533203, + "learning_rate": 5e-06, + "loss": 1.2916, + "num_input_tokens_seen": 153369640, + "step": 1214 + }, + { + "epoch": 0.31142179182966717, + "loss": 1.434908390045166, + "loss_ce": 0.001314537599682808, + "loss_iou": 0.6640625, + "loss_num": 0.0218505859375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 153369640, + "step": 1214 + }, + { + "epoch": 0.3116783171936125, + "grad_norm": 61.26490020751953, + "learning_rate": 5e-06, + "loss": 1.1936, + "num_input_tokens_seen": 153495356, + "step": 1215 + }, + { + "epoch": 0.3116783171936125, + "loss": 1.3405786752700806, + "loss_ce": 0.001711410004645586, + "loss_iou": 0.609375, + "loss_num": 0.0247802734375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 153495356, + "step": 1215 + }, + { + "epoch": 0.3119348425575579, + "grad_norm": 71.84638214111328, + "learning_rate": 5e-06, + "loss": 1.2335, + "num_input_tokens_seen": 153621224, + "step": 1216 + }, + { + "epoch": 0.3119348425575579, + "loss": 1.0313420295715332, + "loss_ce": 0.0015569021925330162, + "loss_iou": 0.478515625, + "loss_num": 0.0142822265625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 153621224, + "step": 1216 + }, + { + "epoch": 0.31219136792150326, + "grad_norm": 57.38467025756836, + "learning_rate": 5e-06, + "loss": 1.3211, + "num_input_tokens_seen": 153747684, + "step": 1217 + }, + { + "epoch": 0.31219136792150326, + "loss": 1.3495609760284424, + "loss_ce": 0.004346134141087532, + "loss_iou": 0.62109375, + "loss_num": 0.0205078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 153747684, + "step": 1217 + }, + { + "epoch": 0.3124478932854486, + "grad_norm": 32.343544006347656, + "learning_rate": 5e-06, + "loss": 1.2674, + "num_input_tokens_seen": 153874400, + "step": 1218 + }, + { + "epoch": 0.3124478932854486, + "loss": 1.3661408424377441, + "loss_ce": 0.0009064363548532128, + "loss_iou": 0.6171875, + "loss_num": 0.026611328125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 153874400, + "step": 1218 + }, + { + "epoch": 0.31270441864939397, + "grad_norm": 60.44613265991211, + "learning_rate": 5e-06, + "loss": 1.2019, + "num_input_tokens_seen": 154001664, + "step": 1219 + }, + { + "epoch": 0.31270441864939397, + "loss": 1.2785712480545044, + "loss_ce": 0.003668930847197771, + "loss_iou": 0.5703125, + "loss_num": 0.0263671875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 154001664, + "step": 1219 + }, + { + "epoch": 0.3129609440133393, + "grad_norm": 70.46881866455078, + "learning_rate": 5e-06, + "loss": 1.2548, + "num_input_tokens_seen": 154128136, + "step": 1220 + }, + { + "epoch": 0.3129609440133393, + "loss": 1.348661184310913, + "loss_ce": 0.0010049305856227875, + "loss_iou": 0.625, + "loss_num": 0.0194091796875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 154128136, + "step": 1220 + }, + { + "epoch": 0.3132174693772847, + "grad_norm": 49.617431640625, + "learning_rate": 5e-06, + "loss": 1.1019, + "num_input_tokens_seen": 154254120, + "step": 1221 + }, + { + "epoch": 0.3132174693772847, + "loss": 1.1534950733184814, + "loss_ce": 0.0011513205245137215, + "loss_iou": 0.52734375, + "loss_num": 0.01904296875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 154254120, + "step": 1221 + }, + { + "epoch": 0.31347399474123006, + "grad_norm": 53.56963348388672, + "learning_rate": 5e-06, + "loss": 1.2725, + "num_input_tokens_seen": 154381052, + "step": 1222 + }, + { + "epoch": 0.31347399474123006, + "loss": 1.111957311630249, + "loss_ce": 0.0011174108367413282, + "loss_iou": 0.5078125, + "loss_num": 0.019287109375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 154381052, + "step": 1222 + }, + { + "epoch": 0.3137305201051754, + "grad_norm": 63.4941520690918, + "learning_rate": 5e-06, + "loss": 1.2187, + "num_input_tokens_seen": 154508132, + "step": 1223 + }, + { + "epoch": 0.3137305201051754, + "loss": 1.055586814880371, + "loss_ce": 0.0008993630763143301, + "loss_iou": 0.5, + "loss_num": 0.0101318359375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 154508132, + "step": 1223 + }, + { + "epoch": 0.31398704546912076, + "grad_norm": 92.18440246582031, + "learning_rate": 5e-06, + "loss": 1.2043, + "num_input_tokens_seen": 154634868, + "step": 1224 + }, + { + "epoch": 0.31398704546912076, + "loss": 1.1712418794631958, + "loss_ce": 0.00034349842462688684, + "loss_iou": 0.546875, + "loss_num": 0.01611328125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 154634868, + "step": 1224 + }, + { + "epoch": 0.31424357083306614, + "grad_norm": 52.418399810791016, + "learning_rate": 5e-06, + "loss": 1.2635, + "num_input_tokens_seen": 154761224, + "step": 1225 + }, + { + "epoch": 0.31424357083306614, + "loss": 1.2632883787155151, + "loss_ce": 0.0015696072950959206, + "loss_iou": 0.58984375, + "loss_num": 0.0169677734375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 154761224, + "step": 1225 + }, + { + "epoch": 0.31450009619701147, + "grad_norm": 35.88551712036133, + "learning_rate": 5e-06, + "loss": 1.21, + "num_input_tokens_seen": 154888452, + "step": 1226 + }, + { + "epoch": 0.31450009619701147, + "loss": 1.17165207862854, + "loss_ce": 0.001241970108821988, + "loss_iou": 0.52734375, + "loss_num": 0.0234375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 154888452, + "step": 1226 + }, + { + "epoch": 0.31475662156095685, + "grad_norm": 22.700986862182617, + "learning_rate": 5e-06, + "loss": 1.1412, + "num_input_tokens_seen": 155015804, + "step": 1227 + }, + { + "epoch": 0.31475662156095685, + "loss": 1.0853543281555176, + "loss_ce": 0.0033230185508728027, + "loss_iou": 0.49609375, + "loss_num": 0.017822265625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 155015804, + "step": 1227 + }, + { + "epoch": 0.3150131469249022, + "grad_norm": 36.262229919433594, + "learning_rate": 5e-06, + "loss": 1.0805, + "num_input_tokens_seen": 155142876, + "step": 1228 + }, + { + "epoch": 0.3150131469249022, + "loss": 1.1358907222747803, + "loss_ce": 0.0011250171810388565, + "loss_iou": 0.53125, + "loss_num": 0.01495361328125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 155142876, + "step": 1228 + }, + { + "epoch": 0.31526967228884756, + "grad_norm": 61.85090255737305, + "learning_rate": 5e-06, + "loss": 1.2037, + "num_input_tokens_seen": 155270552, + "step": 1229 + }, + { + "epoch": 0.31526967228884756, + "loss": 1.3579082489013672, + "loss_ce": 0.0009746490977704525, + "loss_iou": 0.609375, + "loss_num": 0.02783203125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 155270552, + "step": 1229 + }, + { + "epoch": 0.31552619765279294, + "grad_norm": 49.379093170166016, + "learning_rate": 5e-06, + "loss": 1.2888, + "num_input_tokens_seen": 155396964, + "step": 1230 + }, + { + "epoch": 0.31552619765279294, + "loss": 1.084566354751587, + "loss_ce": 0.000582003325689584, + "loss_iou": 0.498046875, + "loss_num": 0.0177001953125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 155396964, + "step": 1230 + }, + { + "epoch": 0.31578272301673826, + "grad_norm": 23.049150466918945, + "learning_rate": 5e-06, + "loss": 1.2119, + "num_input_tokens_seen": 155522456, + "step": 1231 + }, + { + "epoch": 0.31578272301673826, + "loss": 1.3676241636276245, + "loss_ce": 0.0009249672293663025, + "loss_iou": 0.62890625, + "loss_num": 0.021728515625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 155522456, + "step": 1231 + }, + { + "epoch": 0.31603924838068365, + "grad_norm": 37.24660873413086, + "learning_rate": 5e-06, + "loss": 1.2357, + "num_input_tokens_seen": 155649000, + "step": 1232 + }, + { + "epoch": 0.31603924838068365, + "loss": 1.2274184226989746, + "loss_ce": 0.0023206742480397224, + "loss_iou": 0.56640625, + "loss_num": 0.018798828125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 155649000, + "step": 1232 + }, + { + "epoch": 0.316295773744629, + "grad_norm": 74.2628173828125, + "learning_rate": 5e-06, + "loss": 1.2428, + "num_input_tokens_seen": 155777076, + "step": 1233 + }, + { + "epoch": 0.316295773744629, + "loss": 1.421152114868164, + "loss_ce": 0.0007419618195854127, + "loss_iou": 0.64453125, + "loss_num": 0.026611328125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 155777076, + "step": 1233 + }, + { + "epoch": 0.31655229910857435, + "grad_norm": 87.88998413085938, + "learning_rate": 5e-06, + "loss": 1.3826, + "num_input_tokens_seen": 155903808, + "step": 1234 + }, + { + "epoch": 0.31655229910857435, + "loss": 1.3346400260925293, + "loss_ce": 0.0006556602893397212, + "loss_iou": 0.609375, + "loss_num": 0.022705078125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 155903808, + "step": 1234 + }, + { + "epoch": 0.31680882447251973, + "grad_norm": 33.18396759033203, + "learning_rate": 5e-06, + "loss": 1.1047, + "num_input_tokens_seen": 156029688, + "step": 1235 + }, + { + "epoch": 0.31680882447251973, + "loss": 1.1996961832046509, + "loss_ce": 0.003895382396876812, + "loss_iou": 0.546875, + "loss_num": 0.0206298828125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 156029688, + "step": 1235 + }, + { + "epoch": 0.31706534983646506, + "grad_norm": 38.53822708129883, + "learning_rate": 5e-06, + "loss": 1.142, + "num_input_tokens_seen": 156156728, + "step": 1236 + }, + { + "epoch": 0.31706534983646506, + "loss": 1.159820795059204, + "loss_ce": 0.0011293981224298477, + "loss_iou": 0.53125, + "loss_num": 0.01904296875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 156156728, + "step": 1236 + }, + { + "epoch": 0.31732187520041044, + "grad_norm": 99.89830780029297, + "learning_rate": 5e-06, + "loss": 1.1957, + "num_input_tokens_seen": 156283948, + "step": 1237 + }, + { + "epoch": 0.31732187520041044, + "loss": 1.2474554777145386, + "loss_ce": 0.00038512013270519674, + "loss_iou": 0.56640625, + "loss_num": 0.0234375, + "loss_xval": 1.25, + "num_input_tokens_seen": 156283948, + "step": 1237 + }, + { + "epoch": 0.3175784005643558, + "grad_norm": 58.59158706665039, + "learning_rate": 5e-06, + "loss": 1.2775, + "num_input_tokens_seen": 156410556, + "step": 1238 + }, + { + "epoch": 0.3175784005643558, + "loss": 1.4197802543640137, + "loss_ce": 0.0013232952915132046, + "loss_iou": 0.65234375, + "loss_num": 0.0224609375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 156410556, + "step": 1238 + }, + { + "epoch": 0.31783492592830115, + "grad_norm": 50.662818908691406, + "learning_rate": 5e-06, + "loss": 1.2505, + "num_input_tokens_seen": 156537060, + "step": 1239 + }, + { + "epoch": 0.31783492592830115, + "loss": 1.3356022834777832, + "loss_ce": 0.003082744777202606, + "loss_iou": 0.59765625, + "loss_num": 0.02783203125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 156537060, + "step": 1239 + }, + { + "epoch": 0.31809145129224653, + "grad_norm": 37.37484359741211, + "learning_rate": 5e-06, + "loss": 1.0692, + "num_input_tokens_seen": 156663660, + "step": 1240 + }, + { + "epoch": 0.31809145129224653, + "loss": 1.0569722652435303, + "loss_ce": 0.005458524450659752, + "loss_iou": 0.474609375, + "loss_num": 0.0203857421875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 156663660, + "step": 1240 + }, + { + "epoch": 0.31834797665619186, + "grad_norm": 102.17037200927734, + "learning_rate": 5e-06, + "loss": 1.2209, + "num_input_tokens_seen": 156790252, + "step": 1241 + }, + { + "epoch": 0.31834797665619186, + "loss": 1.1857203245162964, + "loss_ce": 0.001638320623897016, + "loss_iou": 0.55078125, + "loss_num": 0.0164794921875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 156790252, + "step": 1241 + }, + { + "epoch": 0.31860450202013724, + "grad_norm": 58.911895751953125, + "learning_rate": 5e-06, + "loss": 1.36, + "num_input_tokens_seen": 156917896, + "step": 1242 + }, + { + "epoch": 0.31860450202013724, + "loss": 1.3864706754684448, + "loss_ce": 0.00024021565332077444, + "loss_iou": 0.6328125, + "loss_num": 0.0238037109375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 156917896, + "step": 1242 + }, + { + "epoch": 0.3188610273840826, + "grad_norm": 46.78139877319336, + "learning_rate": 5e-06, + "loss": 1.3264, + "num_input_tokens_seen": 157045068, + "step": 1243 + }, + { + "epoch": 0.3188610273840826, + "loss": 1.793162226676941, + "loss_ce": 0.002146589569747448, + "loss_iou": 0.78515625, + "loss_num": 0.043212890625, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 157045068, + "step": 1243 + }, + { + "epoch": 0.31911755274802794, + "grad_norm": 29.730369567871094, + "learning_rate": 5e-06, + "loss": 1.1455, + "num_input_tokens_seen": 157171968, + "step": 1244 + }, + { + "epoch": 0.31911755274802794, + "loss": 1.131456732749939, + "loss_ce": 0.0020621963776648045, + "loss_iou": 0.52734375, + "loss_num": 0.01446533203125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 157171968, + "step": 1244 + }, + { + "epoch": 0.3193740781119733, + "grad_norm": 77.9807357788086, + "learning_rate": 5e-06, + "loss": 1.0194, + "num_input_tokens_seen": 157299032, + "step": 1245 + }, + { + "epoch": 0.3193740781119733, + "loss": 0.9682940244674683, + "loss_ce": 0.0014971306081861258, + "loss_iou": 0.451171875, + "loss_num": 0.0128173828125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 157299032, + "step": 1245 + }, + { + "epoch": 0.3196306034759187, + "grad_norm": 48.1530647277832, + "learning_rate": 5e-06, + "loss": 1.2666, + "num_input_tokens_seen": 157425116, + "step": 1246 + }, + { + "epoch": 0.3196306034759187, + "loss": 1.2508158683776855, + "loss_ce": 0.0022806732449680567, + "loss_iou": 0.58203125, + "loss_num": 0.0166015625, + "loss_xval": 1.25, + "num_input_tokens_seen": 157425116, + "step": 1246 + }, + { + "epoch": 0.31988712883986403, + "grad_norm": 46.44908142089844, + "learning_rate": 5e-06, + "loss": 1.0781, + "num_input_tokens_seen": 157550620, + "step": 1247 + }, + { + "epoch": 0.31988712883986403, + "loss": 1.1350905895233154, + "loss_ce": 0.002278009196743369, + "loss_iou": 0.5234375, + "loss_num": 0.0174560546875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 157550620, + "step": 1247 + }, + { + "epoch": 0.3201436542038094, + "grad_norm": 45.566993713378906, + "learning_rate": 5e-06, + "loss": 1.3358, + "num_input_tokens_seen": 157677300, + "step": 1248 + }, + { + "epoch": 0.3201436542038094, + "loss": 1.2589658498764038, + "loss_ce": 0.002618188504129648, + "loss_iou": 0.58984375, + "loss_num": 0.01495361328125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 157677300, + "step": 1248 + }, + { + "epoch": 0.32040017956775474, + "grad_norm": 56.907222747802734, + "learning_rate": 5e-06, + "loss": 1.0396, + "num_input_tokens_seen": 157803740, + "step": 1249 + }, + { + "epoch": 0.32040017956775474, + "loss": 0.7915816307067871, + "loss_ce": 0.0015425414312630892, + "loss_iou": 0.376953125, + "loss_num": 0.0072021484375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 157803740, + "step": 1249 + }, + { + "epoch": 0.3206567049317001, + "grad_norm": 97.7392807006836, + "learning_rate": 5e-06, + "loss": 1.2423, + "num_input_tokens_seen": 157930284, + "step": 1250 + }, + { + "epoch": 0.3206567049317001, + "eval_icons_CIoU": 0.07249780464917421, + "eval_icons_GIoU": 0.051892523964852444, + "eval_icons_IoU": 0.2341599464416504, + "eval_icons_MAE_all": 0.041067127138376236, + "eval_icons_MAE_h": 0.06431837379932404, + "eval_icons_MAE_w": 0.05282559059560299, + "eval_icons_MAE_x_boxes": 0.04309998266398907, + "eval_icons_MAE_y_boxes": 0.05791044794023037, + "eval_icons_NUM_probability": 0.9998266100883484, + "eval_icons_inside_bbox": 0.4357638955116272, + "eval_icons_loss": 2.0571579933166504, + "eval_icons_loss_ce": 0.00025336610997328535, + "eval_icons_loss_iou": 0.9228515625, + "eval_icons_loss_num": 0.045867919921875, + "eval_icons_loss_xval": 2.0751953125, + "eval_icons_runtime": 48.1488, + "eval_icons_samples_per_second": 1.038, + "eval_icons_steps_per_second": 0.042, + "num_input_tokens_seen": 157930284, + "step": 1250 + }, + { + "epoch": 0.3206567049317001, + "eval_screenspot_CIoU": 0.09262648721536, + "eval_screenspot_GIoU": 0.07646190685530503, + "eval_screenspot_IoU": 0.27494342625141144, + "eval_screenspot_MAE_all": 0.08039362480243047, + "eval_screenspot_MAE_h": 0.061564527451992035, + "eval_screenspot_MAE_w": 0.13922135531902313, + "eval_screenspot_MAE_x_boxes": 0.10712922116120656, + "eval_screenspot_MAE_y_boxes": 0.057735685259103775, + "eval_screenspot_NUM_probability": 0.9997729857762655, + "eval_screenspot_inside_bbox": 0.5674999952316284, + "eval_screenspot_loss": 2.295961856842041, + "eval_screenspot_loss_ce": 0.0023575947464754186, + "eval_screenspot_loss_iou": 0.9524739583333334, + "eval_screenspot_loss_num": 0.087677001953125, + "eval_screenspot_loss_xval": 2.3440755208333335, + "eval_screenspot_runtime": 92.9896, + "eval_screenspot_samples_per_second": 0.957, + "eval_screenspot_steps_per_second": 0.032, + "num_input_tokens_seen": 157930284, + "step": 1250 + }, + { + "epoch": 0.3206567049317001, + "loss": 2.281397819519043, + "loss_ce": 0.0021008620969951153, + "loss_iou": 0.93359375, + "loss_num": 0.08154296875, + "loss_xval": 2.28125, + "num_input_tokens_seen": 157930284, + "step": 1250 + }, + { + "epoch": 0.3209132302956455, + "grad_norm": 49.13286209106445, + "learning_rate": 5e-06, + "loss": 1.3605, + "num_input_tokens_seen": 158056948, + "step": 1251 + }, + { + "epoch": 0.3209132302956455, + "loss": 1.4044119119644165, + "loss_ce": 0.0020682315807789564, + "loss_iou": 0.65625, + "loss_num": 0.0174560546875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 158056948, + "step": 1251 + }, + { + "epoch": 0.3211697556595908, + "grad_norm": 22.003841400146484, + "learning_rate": 5e-06, + "loss": 1.1204, + "num_input_tokens_seen": 158183664, + "step": 1252 + }, + { + "epoch": 0.3211697556595908, + "loss": 1.1046695709228516, + "loss_ce": 0.001642246963456273, + "loss_iou": 0.50390625, + "loss_num": 0.0194091796875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 158183664, + "step": 1252 + }, + { + "epoch": 0.3214262810235362, + "grad_norm": 38.642093658447266, + "learning_rate": 5e-06, + "loss": 1.1466, + "num_input_tokens_seen": 158310920, + "step": 1253 + }, + { + "epoch": 0.3214262810235362, + "loss": 1.2146748304367065, + "loss_ce": 0.0012959240702912211, + "loss_iou": 0.5703125, + "loss_num": 0.014404296875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 158310920, + "step": 1253 + }, + { + "epoch": 0.32168280638748153, + "grad_norm": 79.42029571533203, + "learning_rate": 5e-06, + "loss": 1.1508, + "num_input_tokens_seen": 158438064, + "step": 1254 + }, + { + "epoch": 0.32168280638748153, + "loss": 1.1083695888519287, + "loss_ce": 0.0019241985864937305, + "loss_iou": 0.5234375, + "loss_num": 0.01275634765625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 158438064, + "step": 1254 + }, + { + "epoch": 0.3219393317514269, + "grad_norm": 59.38174819946289, + "learning_rate": 5e-06, + "loss": 1.1327, + "num_input_tokens_seen": 158564312, + "step": 1255 + }, + { + "epoch": 0.3219393317514269, + "loss": 0.9882862567901611, + "loss_ce": 0.001469826209358871, + "loss_iou": 0.46875, + "loss_num": 0.0098876953125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 158564312, + "step": 1255 + }, + { + "epoch": 0.3221958571153723, + "grad_norm": 53.7601432800293, + "learning_rate": 5e-06, + "loss": 1.182, + "num_input_tokens_seen": 158690956, + "step": 1256 + }, + { + "epoch": 0.3221958571153723, + "loss": 1.2844516038894653, + "loss_ce": 0.004178113769739866, + "loss_iou": 0.58203125, + "loss_num": 0.0233154296875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 158690956, + "step": 1256 + }, + { + "epoch": 0.3224523824793176, + "grad_norm": 46.47795104980469, + "learning_rate": 5e-06, + "loss": 1.2916, + "num_input_tokens_seen": 158817132, + "step": 1257 + }, + { + "epoch": 0.3224523824793176, + "loss": 1.4170336723327637, + "loss_ce": 0.0015063219470903277, + "loss_iou": 0.64453125, + "loss_num": 0.0252685546875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 158817132, + "step": 1257 + }, + { + "epoch": 0.322708907843263, + "grad_norm": 51.43967819213867, + "learning_rate": 5e-06, + "loss": 1.1378, + "num_input_tokens_seen": 158943264, + "step": 1258 + }, + { + "epoch": 0.322708907843263, + "loss": 1.0961565971374512, + "loss_ce": 0.00045342050725594163, + "loss_iou": 0.50390625, + "loss_num": 0.01806640625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 158943264, + "step": 1258 + }, + { + "epoch": 0.3229654332072084, + "grad_norm": 56.76911163330078, + "learning_rate": 5e-06, + "loss": 1.2516, + "num_input_tokens_seen": 159070256, + "step": 1259 + }, + { + "epoch": 0.3229654332072084, + "loss": 1.4198393821716309, + "loss_ce": 0.0057767960242927074, + "loss_iou": 0.64453125, + "loss_num": 0.024658203125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 159070256, + "step": 1259 + }, + { + "epoch": 0.3232219585711537, + "grad_norm": 52.00578308105469, + "learning_rate": 5e-06, + "loss": 1.1684, + "num_input_tokens_seen": 159195724, + "step": 1260 + }, + { + "epoch": 0.3232219585711537, + "loss": 1.0434730052947998, + "loss_ce": 0.0005042726988904178, + "loss_iou": 0.498046875, + "loss_num": 0.00927734375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 159195724, + "step": 1260 + }, + { + "epoch": 0.3234784839350991, + "grad_norm": 75.0935287475586, + "learning_rate": 5e-06, + "loss": 1.2233, + "num_input_tokens_seen": 159321300, + "step": 1261 + }, + { + "epoch": 0.3234784839350991, + "loss": 1.0849721431732178, + "loss_ce": 0.0014760458143427968, + "loss_iou": 0.51953125, + "loss_num": 0.0091552734375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 159321300, + "step": 1261 + }, + { + "epoch": 0.3237350092990444, + "grad_norm": 58.67608642578125, + "learning_rate": 5e-06, + "loss": 1.175, + "num_input_tokens_seen": 159446904, + "step": 1262 + }, + { + "epoch": 0.3237350092990444, + "loss": 1.0455586910247803, + "loss_ce": 0.0006368847680278122, + "loss_iou": 0.490234375, + "loss_num": 0.0128173828125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 159446904, + "step": 1262 + }, + { + "epoch": 0.3239915346629898, + "grad_norm": 51.717247009277344, + "learning_rate": 5e-06, + "loss": 1.1815, + "num_input_tokens_seen": 159572288, + "step": 1263 + }, + { + "epoch": 0.3239915346629898, + "loss": 1.384324312210083, + "loss_ce": 0.004929807037115097, + "loss_iou": 0.61328125, + "loss_num": 0.0308837890625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 159572288, + "step": 1263 + }, + { + "epoch": 0.3242480600269352, + "grad_norm": 67.50569915771484, + "learning_rate": 5e-06, + "loss": 1.286, + "num_input_tokens_seen": 159700324, + "step": 1264 + }, + { + "epoch": 0.3242480600269352, + "loss": 1.140454888343811, + "loss_ce": 0.0008064016001299024, + "loss_iou": 0.53125, + "loss_num": 0.0157470703125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 159700324, + "step": 1264 + }, + { + "epoch": 0.3245045853908805, + "grad_norm": 84.49181365966797, + "learning_rate": 5e-06, + "loss": 1.1452, + "num_input_tokens_seen": 159826228, + "step": 1265 + }, + { + "epoch": 0.3245045853908805, + "loss": 1.0273422002792358, + "loss_ce": 0.0004867032985202968, + "loss_iou": 0.478515625, + "loss_num": 0.0135498046875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 159826228, + "step": 1265 + }, + { + "epoch": 0.3247611107548259, + "grad_norm": 46.9498405456543, + "learning_rate": 5e-06, + "loss": 1.2702, + "num_input_tokens_seen": 159951808, + "step": 1266 + }, + { + "epoch": 0.3247611107548259, + "loss": 1.376418113708496, + "loss_ce": 0.0009298399090766907, + "loss_iou": 0.62890625, + "loss_num": 0.023193359375, + "loss_xval": 1.375, + "num_input_tokens_seen": 159951808, + "step": 1266 + }, + { + "epoch": 0.32501763611877127, + "grad_norm": 40.754417419433594, + "learning_rate": 5e-06, + "loss": 1.2289, + "num_input_tokens_seen": 160078512, + "step": 1267 + }, + { + "epoch": 0.32501763611877127, + "loss": 1.1473593711853027, + "loss_ce": 0.002828042721375823, + "loss_iou": 0.51953125, + "loss_num": 0.0213623046875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 160078512, + "step": 1267 + }, + { + "epoch": 0.3252741614827166, + "grad_norm": 55.02046203613281, + "learning_rate": 5e-06, + "loss": 1.2602, + "num_input_tokens_seen": 160204528, + "step": 1268 + }, + { + "epoch": 0.3252741614827166, + "loss": 1.283548355102539, + "loss_ce": 0.0013217454543337226, + "loss_iou": 0.57421875, + "loss_num": 0.02734375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 160204528, + "step": 1268 + }, + { + "epoch": 0.325530686846662, + "grad_norm": 55.47435760498047, + "learning_rate": 5e-06, + "loss": 1.171, + "num_input_tokens_seen": 160330000, + "step": 1269 + }, + { + "epoch": 0.325530686846662, + "loss": 1.098606824874878, + "loss_ce": 0.0009504985064268112, + "loss_iou": 0.51171875, + "loss_num": 0.0147705078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 160330000, + "step": 1269 + }, + { + "epoch": 0.3257872122106073, + "grad_norm": 75.13670349121094, + "learning_rate": 5e-06, + "loss": 1.0311, + "num_input_tokens_seen": 160457704, + "step": 1270 + }, + { + "epoch": 0.3257872122106073, + "loss": 0.9925092458724976, + "loss_ce": 0.0008100575651042163, + "loss_iou": 0.466796875, + "loss_num": 0.011474609375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 160457704, + "step": 1270 + }, + { + "epoch": 0.3260437375745527, + "grad_norm": 49.65742874145508, + "learning_rate": 5e-06, + "loss": 1.2935, + "num_input_tokens_seen": 160583604, + "step": 1271 + }, + { + "epoch": 0.3260437375745527, + "loss": 1.3626670837402344, + "loss_ce": 0.0032921605743467808, + "loss_iou": 0.6171875, + "loss_num": 0.0255126953125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 160583604, + "step": 1271 + }, + { + "epoch": 0.32630026293849806, + "grad_norm": 44.61846923828125, + "learning_rate": 5e-06, + "loss": 1.066, + "num_input_tokens_seen": 160711032, + "step": 1272 + }, + { + "epoch": 0.32630026293849806, + "loss": 1.0957932472229004, + "loss_ce": 0.0025316495448350906, + "loss_iou": 0.5, + "loss_num": 0.0189208984375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 160711032, + "step": 1272 + }, + { + "epoch": 0.3265567883024434, + "grad_norm": 56.59669876098633, + "learning_rate": 5e-06, + "loss": 1.1031, + "num_input_tokens_seen": 160838388, + "step": 1273 + }, + { + "epoch": 0.3265567883024434, + "loss": 1.1854407787322998, + "loss_ce": 0.0008704534848220646, + "loss_iou": 0.55859375, + "loss_num": 0.013671875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 160838388, + "step": 1273 + }, + { + "epoch": 0.32681331366638877, + "grad_norm": 47.14057922363281, + "learning_rate": 5e-06, + "loss": 1.1454, + "num_input_tokens_seen": 160964152, + "step": 1274 + }, + { + "epoch": 0.32681331366638877, + "loss": 1.0448768138885498, + "loss_ce": 0.007767427712678909, + "loss_iou": 0.46484375, + "loss_num": 0.021484375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 160964152, + "step": 1274 + }, + { + "epoch": 0.3270698390303341, + "grad_norm": 25.139583587646484, + "learning_rate": 5e-06, + "loss": 1.1345, + "num_input_tokens_seen": 161089668, + "step": 1275 + }, + { + "epoch": 0.3270698390303341, + "loss": 1.2592412233352661, + "loss_ce": 0.0038701368030160666, + "loss_iou": 0.55859375, + "loss_num": 0.0274658203125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 161089668, + "step": 1275 + }, + { + "epoch": 0.3273263643942795, + "grad_norm": 41.49599075317383, + "learning_rate": 5e-06, + "loss": 1.089, + "num_input_tokens_seen": 161215136, + "step": 1276 + }, + { + "epoch": 0.3273263643942795, + "loss": 1.1638729572296143, + "loss_ce": 0.000542897789273411, + "loss_iou": 0.54296875, + "loss_num": 0.01556396484375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 161215136, + "step": 1276 + }, + { + "epoch": 0.32758288975822486, + "grad_norm": 40.1270637512207, + "learning_rate": 5e-06, + "loss": 1.1918, + "num_input_tokens_seen": 161341232, + "step": 1277 + }, + { + "epoch": 0.32758288975822486, + "loss": 1.261702537536621, + "loss_ce": 0.0034017222933471203, + "loss_iou": 0.5703125, + "loss_num": 0.024169921875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 161341232, + "step": 1277 + }, + { + "epoch": 0.3278394151221702, + "grad_norm": 62.625404357910156, + "learning_rate": 5e-06, + "loss": 1.1796, + "num_input_tokens_seen": 161468304, + "step": 1278 + }, + { + "epoch": 0.3278394151221702, + "loss": 1.2309691905975342, + "loss_ce": 0.004895085468888283, + "loss_iou": 0.55078125, + "loss_num": 0.02490234375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 161468304, + "step": 1278 + }, + { + "epoch": 0.32809594048611557, + "grad_norm": 47.544281005859375, + "learning_rate": 5e-06, + "loss": 1.2608, + "num_input_tokens_seen": 161595512, + "step": 1279 + }, + { + "epoch": 0.32809594048611557, + "loss": 1.0017330646514893, + "loss_ce": 0.0002682343474589288, + "loss_iou": 0.4765625, + "loss_num": 0.00970458984375, + "loss_xval": 1.0, + "num_input_tokens_seen": 161595512, + "step": 1279 + }, + { + "epoch": 0.32835246585006095, + "grad_norm": 37.14018630981445, + "learning_rate": 5e-06, + "loss": 1.0989, + "num_input_tokens_seen": 161722260, + "step": 1280 + }, + { + "epoch": 0.32835246585006095, + "loss": 1.0397568941116333, + "loss_ce": 0.0006944277556613088, + "loss_iou": 0.482421875, + "loss_num": 0.01483154296875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 161722260, + "step": 1280 + }, + { + "epoch": 0.3286089912140063, + "grad_norm": 82.61949157714844, + "learning_rate": 5e-06, + "loss": 1.2128, + "num_input_tokens_seen": 161849356, + "step": 1281 + }, + { + "epoch": 0.3286089912140063, + "loss": 1.1757259368896484, + "loss_ce": 0.0038508926518261433, + "loss_iou": 0.5390625, + "loss_num": 0.0189208984375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 161849356, + "step": 1281 + }, + { + "epoch": 0.32886551657795166, + "grad_norm": 46.82399368286133, + "learning_rate": 5e-06, + "loss": 1.3612, + "num_input_tokens_seen": 161976004, + "step": 1282 + }, + { + "epoch": 0.32886551657795166, + "loss": 1.2931817770004272, + "loss_ce": 0.0007012529531493783, + "loss_iou": 0.6015625, + "loss_num": 0.0172119140625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 161976004, + "step": 1282 + }, + { + "epoch": 0.329122041941897, + "grad_norm": 41.240333557128906, + "learning_rate": 5e-06, + "loss": 1.1757, + "num_input_tokens_seen": 162101172, + "step": 1283 + }, + { + "epoch": 0.329122041941897, + "loss": 1.0424840450286865, + "loss_ce": 0.0009800756815820932, + "loss_iou": 0.48046875, + "loss_num": 0.015869140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 162101172, + "step": 1283 + }, + { + "epoch": 0.32937856730584236, + "grad_norm": 39.98911666870117, + "learning_rate": 5e-06, + "loss": 1.1001, + "num_input_tokens_seen": 162227364, + "step": 1284 + }, + { + "epoch": 0.32937856730584236, + "loss": 0.9344456791877747, + "loss_ce": 0.003293338231742382, + "loss_iou": 0.44921875, + "loss_num": 0.006622314453125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 162227364, + "step": 1284 + }, + { + "epoch": 0.32963509266978774, + "grad_norm": 64.64836883544922, + "learning_rate": 5e-06, + "loss": 1.1642, + "num_input_tokens_seen": 162354108, + "step": 1285 + }, + { + "epoch": 0.32963509266978774, + "loss": 0.9994192719459534, + "loss_ce": 0.00039583229226991534, + "loss_iou": 0.470703125, + "loss_num": 0.011962890625, + "loss_xval": 1.0, + "num_input_tokens_seen": 162354108, + "step": 1285 + }, + { + "epoch": 0.32989161803373307, + "grad_norm": 52.525047302246094, + "learning_rate": 5e-06, + "loss": 1.3096, + "num_input_tokens_seen": 162480556, + "step": 1286 + }, + { + "epoch": 0.32989161803373307, + "loss": 1.263479232788086, + "loss_ce": 0.0022488341201096773, + "loss_iou": 0.578125, + "loss_num": 0.0203857421875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 162480556, + "step": 1286 + }, + { + "epoch": 0.33014814339767845, + "grad_norm": 31.575300216674805, + "learning_rate": 5e-06, + "loss": 1.0925, + "num_input_tokens_seen": 162606876, + "step": 1287 + }, + { + "epoch": 0.33014814339767845, + "loss": 1.0394039154052734, + "loss_ce": 0.0018062794115394354, + "loss_iou": 0.46875, + "loss_num": 0.0194091796875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 162606876, + "step": 1287 + }, + { + "epoch": 0.33040466876162383, + "grad_norm": 51.45674133300781, + "learning_rate": 5e-06, + "loss": 1.1673, + "num_input_tokens_seen": 162733600, + "step": 1288 + }, + { + "epoch": 0.33040466876162383, + "loss": 1.156322717666626, + "loss_ce": 0.0010491975117474794, + "loss_iou": 0.52734375, + "loss_num": 0.020751953125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 162733600, + "step": 1288 + }, + { + "epoch": 0.33066119412556916, + "grad_norm": 94.004150390625, + "learning_rate": 5e-06, + "loss": 1.1264, + "num_input_tokens_seen": 162861144, + "step": 1289 + }, + { + "epoch": 0.33066119412556916, + "loss": 1.1836069822311401, + "loss_ce": 0.0012340189423412085, + "loss_iou": 0.53125, + "loss_num": 0.0235595703125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 162861144, + "step": 1289 + }, + { + "epoch": 0.33091771948951454, + "grad_norm": 61.09849166870117, + "learning_rate": 5e-06, + "loss": 1.3749, + "num_input_tokens_seen": 162987824, + "step": 1290 + }, + { + "epoch": 0.33091771948951454, + "loss": 1.4572348594665527, + "loss_ce": 0.001180208520963788, + "loss_iou": 0.6640625, + "loss_num": 0.0263671875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 162987824, + "step": 1290 + }, + { + "epoch": 0.33117424485345986, + "grad_norm": 36.687713623046875, + "learning_rate": 5e-06, + "loss": 1.09, + "num_input_tokens_seen": 163112908, + "step": 1291 + }, + { + "epoch": 0.33117424485345986, + "loss": 1.0988482236862183, + "loss_ce": 0.00070370570756495, + "loss_iou": 0.50390625, + "loss_num": 0.0185546875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 163112908, + "step": 1291 + }, + { + "epoch": 0.33143077021740525, + "grad_norm": 70.69438934326172, + "learning_rate": 5e-06, + "loss": 1.1085, + "num_input_tokens_seen": 163238448, + "step": 1292 + }, + { + "epoch": 0.33143077021740525, + "loss": 0.9702016115188599, + "loss_ce": 0.0019399607554078102, + "loss_iou": 0.44921875, + "loss_num": 0.013671875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 163238448, + "step": 1292 + }, + { + "epoch": 0.3316872955813506, + "grad_norm": 57.450984954833984, + "learning_rate": 5e-06, + "loss": 1.1714, + "num_input_tokens_seen": 163364788, + "step": 1293 + }, + { + "epoch": 0.3316872955813506, + "loss": 1.23496675491333, + "loss_ce": 0.00449793878942728, + "loss_iou": 0.5625, + "loss_num": 0.0208740234375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 163364788, + "step": 1293 + }, + { + "epoch": 0.33194382094529595, + "grad_norm": 43.01051330566406, + "learning_rate": 5e-06, + "loss": 1.1369, + "num_input_tokens_seen": 163490588, + "step": 1294 + }, + { + "epoch": 0.33194382094529595, + "loss": 1.2329139709472656, + "loss_ce": 0.003910066559910774, + "loss_iou": 0.55859375, + "loss_num": 0.0228271484375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 163490588, + "step": 1294 + }, + { + "epoch": 0.33220034630924133, + "grad_norm": 61.832157135009766, + "learning_rate": 5e-06, + "loss": 1.2807, + "num_input_tokens_seen": 163615720, + "step": 1295 + }, + { + "epoch": 0.33220034630924133, + "loss": 1.2912871837615967, + "loss_ce": 0.001248168759047985, + "loss_iou": 0.5859375, + "loss_num": 0.023193359375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 163615720, + "step": 1295 + }, + { + "epoch": 0.33245687167318666, + "grad_norm": 48.8303108215332, + "learning_rate": 5e-06, + "loss": 1.1549, + "num_input_tokens_seen": 163743344, + "step": 1296 + }, + { + "epoch": 0.33245687167318666, + "loss": 1.1412134170532227, + "loss_ce": 0.0015649141278117895, + "loss_iou": 0.53125, + "loss_num": 0.0162353515625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 163743344, + "step": 1296 + }, + { + "epoch": 0.33271339703713204, + "grad_norm": 72.43038177490234, + "learning_rate": 5e-06, + "loss": 1.0591, + "num_input_tokens_seen": 163869880, + "step": 1297 + }, + { + "epoch": 0.33271339703713204, + "loss": 1.0622000694274902, + "loss_ce": 0.0011650202795863152, + "loss_iou": 0.50390625, + "loss_num": 0.010498046875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 163869880, + "step": 1297 + }, + { + "epoch": 0.3329699224010774, + "grad_norm": 55.02589416503906, + "learning_rate": 5e-06, + "loss": 1.3604, + "num_input_tokens_seen": 163995688, + "step": 1298 + }, + { + "epoch": 0.3329699224010774, + "loss": 1.3565266132354736, + "loss_ce": 0.0054524801671504974, + "loss_iou": 0.6171875, + "loss_num": 0.0240478515625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 163995688, + "step": 1298 + }, + { + "epoch": 0.33322644776502275, + "grad_norm": 32.55293273925781, + "learning_rate": 5e-06, + "loss": 1.1037, + "num_input_tokens_seen": 164121796, + "step": 1299 + }, + { + "epoch": 0.33322644776502275, + "loss": 1.0788705348968506, + "loss_ce": 0.0007456161547452211, + "loss_iou": 0.50390625, + "loss_num": 0.01446533203125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 164121796, + "step": 1299 + }, + { + "epoch": 0.33348297312896813, + "grad_norm": 46.604618072509766, + "learning_rate": 5e-06, + "loss": 1.2442, + "num_input_tokens_seen": 164247464, + "step": 1300 + }, + { + "epoch": 0.33348297312896813, + "loss": 1.1920204162597656, + "loss_ce": 0.00305565120652318, + "loss_iou": 0.53125, + "loss_num": 0.025390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 164247464, + "step": 1300 + }, + { + "epoch": 0.3337394984929135, + "grad_norm": 59.683128356933594, + "learning_rate": 5e-06, + "loss": 1.154, + "num_input_tokens_seen": 164374484, + "step": 1301 + }, + { + "epoch": 0.3337394984929135, + "loss": 1.1062514781951904, + "loss_ce": 0.0022474948782473803, + "loss_iou": 0.51171875, + "loss_num": 0.01556396484375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 164374484, + "step": 1301 + }, + { + "epoch": 0.33399602385685884, + "grad_norm": 84.57758331298828, + "learning_rate": 5e-06, + "loss": 1.2092, + "num_input_tokens_seen": 164500856, + "step": 1302 + }, + { + "epoch": 0.33399602385685884, + "loss": 1.2464287281036377, + "loss_ce": 0.0008232389809563756, + "loss_iou": 0.58984375, + "loss_num": 0.0126953125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 164500856, + "step": 1302 + }, + { + "epoch": 0.3342525492208042, + "grad_norm": 49.97148132324219, + "learning_rate": 5e-06, + "loss": 1.3578, + "num_input_tokens_seen": 164626968, + "step": 1303 + }, + { + "epoch": 0.3342525492208042, + "loss": 1.3367893695831299, + "loss_ce": 0.0008519052062183619, + "loss_iou": 0.609375, + "loss_num": 0.022705078125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 164626968, + "step": 1303 + }, + { + "epoch": 0.33450907458474954, + "grad_norm": 34.75997543334961, + "learning_rate": 5e-06, + "loss": 1.1853, + "num_input_tokens_seen": 164752744, + "step": 1304 + }, + { + "epoch": 0.33450907458474954, + "loss": 1.1197948455810547, + "loss_ce": 0.001142518362030387, + "loss_iou": 0.515625, + "loss_num": 0.01806640625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 164752744, + "step": 1304 + }, + { + "epoch": 0.3347655999486949, + "grad_norm": 50.69468688964844, + "learning_rate": 5e-06, + "loss": 1.0847, + "num_input_tokens_seen": 164878968, + "step": 1305 + }, + { + "epoch": 0.3347655999486949, + "loss": 1.1378719806671143, + "loss_ce": 0.0006649799179285765, + "loss_iou": 0.53125, + "loss_num": 0.01531982421875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 164878968, + "step": 1305 + }, + { + "epoch": 0.3350221253126403, + "grad_norm": 92.58582305908203, + "learning_rate": 5e-06, + "loss": 1.2589, + "num_input_tokens_seen": 165004864, + "step": 1306 + }, + { + "epoch": 0.3350221253126403, + "loss": 0.9723803997039795, + "loss_ce": 0.000700663193129003, + "loss_iou": 0.466796875, + "loss_num": 0.00799560546875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 165004864, + "step": 1306 + }, + { + "epoch": 0.33527865067658563, + "grad_norm": 51.65283203125, + "learning_rate": 5e-06, + "loss": 1.2906, + "num_input_tokens_seen": 165131496, + "step": 1307 + }, + { + "epoch": 0.33527865067658563, + "loss": 1.15885591506958, + "loss_ce": 0.001141174347139895, + "loss_iou": 0.5390625, + "loss_num": 0.016357421875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 165131496, + "step": 1307 + }, + { + "epoch": 0.335535176040531, + "grad_norm": 65.71267700195312, + "learning_rate": 5e-06, + "loss": 1.0923, + "num_input_tokens_seen": 165258852, + "step": 1308 + }, + { + "epoch": 0.335535176040531, + "loss": 1.016379952430725, + "loss_ce": 0.005149520002305508, + "loss_iou": 0.4609375, + "loss_num": 0.0181884765625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 165258852, + "step": 1308 + }, + { + "epoch": 0.3357917014044764, + "grad_norm": 71.03826141357422, + "learning_rate": 5e-06, + "loss": 1.2708, + "num_input_tokens_seen": 165384548, + "step": 1309 + }, + { + "epoch": 0.3357917014044764, + "loss": 1.233874797821045, + "loss_ce": 0.0014529803302139044, + "loss_iou": 0.578125, + "loss_num": 0.01507568359375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 165384548, + "step": 1309 + }, + { + "epoch": 0.3360482267684217, + "grad_norm": 49.2921028137207, + "learning_rate": 5e-06, + "loss": 1.3836, + "num_input_tokens_seen": 165510276, + "step": 1310 + }, + { + "epoch": 0.3360482267684217, + "loss": 1.41239333152771, + "loss_ce": 0.0007722391746938229, + "loss_iou": 0.640625, + "loss_num": 0.0257568359375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 165510276, + "step": 1310 + }, + { + "epoch": 0.3363047521323671, + "grad_norm": 35.518699645996094, + "learning_rate": 5e-06, + "loss": 1.0468, + "num_input_tokens_seen": 165636464, + "step": 1311 + }, + { + "epoch": 0.3363047521323671, + "loss": 1.0874557495117188, + "loss_ce": 0.0020066231954842806, + "loss_iou": 0.4921875, + "loss_num": 0.020263671875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 165636464, + "step": 1311 + }, + { + "epoch": 0.3365612774963124, + "grad_norm": 51.436161041259766, + "learning_rate": 5e-06, + "loss": 1.1029, + "num_input_tokens_seen": 165763244, + "step": 1312 + }, + { + "epoch": 0.3365612774963124, + "loss": 1.0276087522506714, + "loss_ce": 0.0007533463649451733, + "loss_iou": 0.482421875, + "loss_num": 0.01287841796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 165763244, + "step": 1312 + }, + { + "epoch": 0.3368178028602578, + "grad_norm": 54.61701202392578, + "learning_rate": 5e-06, + "loss": 1.2783, + "num_input_tokens_seen": 165888648, + "step": 1313 + }, + { + "epoch": 0.3368178028602578, + "loss": 1.3299647569656372, + "loss_ce": 0.0013514950405806303, + "loss_iou": 0.609375, + "loss_num": 0.0216064453125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 165888648, + "step": 1313 + }, + { + "epoch": 0.3370743282242032, + "grad_norm": 59.59455490112305, + "learning_rate": 5e-06, + "loss": 1.2099, + "num_input_tokens_seen": 166015336, + "step": 1314 + }, + { + "epoch": 0.3370743282242032, + "loss": 1.163291096687317, + "loss_ce": 0.0006934672128409147, + "loss_iou": 0.5234375, + "loss_num": 0.0234375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 166015336, + "step": 1314 + }, + { + "epoch": 0.3373308535881485, + "grad_norm": 62.63290786743164, + "learning_rate": 5e-06, + "loss": 1.1165, + "num_input_tokens_seen": 166141704, + "step": 1315 + }, + { + "epoch": 0.3373308535881485, + "loss": 1.0286238193511963, + "loss_ce": 0.0007917361799627542, + "loss_iou": 0.484375, + "loss_num": 0.01141357421875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 166141704, + "step": 1315 + }, + { + "epoch": 0.3375873789520939, + "grad_norm": 58.727325439453125, + "learning_rate": 5e-06, + "loss": 1.2586, + "num_input_tokens_seen": 166268904, + "step": 1316 + }, + { + "epoch": 0.3375873789520939, + "loss": 1.2499585151672363, + "loss_ce": 0.0009350709151476622, + "loss_iou": 0.578125, + "loss_num": 0.0186767578125, + "loss_xval": 1.25, + "num_input_tokens_seen": 166268904, + "step": 1316 + }, + { + "epoch": 0.3378439043160392, + "grad_norm": 63.83665084838867, + "learning_rate": 5e-06, + "loss": 1.1569, + "num_input_tokens_seen": 166394472, + "step": 1317 + }, + { + "epoch": 0.3378439043160392, + "loss": 1.1275815963745117, + "loss_ce": 0.0016050159465521574, + "loss_iou": 0.515625, + "loss_num": 0.0186767578125, + "loss_xval": 1.125, + "num_input_tokens_seen": 166394472, + "step": 1317 + }, + { + "epoch": 0.3381004296799846, + "grad_norm": 42.83293533325195, + "learning_rate": 5e-06, + "loss": 1.1778, + "num_input_tokens_seen": 166520728, + "step": 1318 + }, + { + "epoch": 0.3381004296799846, + "loss": 1.210350513458252, + "loss_ce": 0.006737162824720144, + "loss_iou": 0.5625, + "loss_num": 0.01611328125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 166520728, + "step": 1318 + }, + { + "epoch": 0.33835695504393, + "grad_norm": 41.9317741394043, + "learning_rate": 5e-06, + "loss": 1.2298, + "num_input_tokens_seen": 166646788, + "step": 1319 + }, + { + "epoch": 0.33835695504393, + "loss": 1.227984070777893, + "loss_ce": 0.004839526489377022, + "loss_iou": 0.546875, + "loss_num": 0.02587890625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 166646788, + "step": 1319 + }, + { + "epoch": 0.3386134804078753, + "grad_norm": 44.55878829956055, + "learning_rate": 5e-06, + "loss": 1.0729, + "num_input_tokens_seen": 166772436, + "step": 1320 + }, + { + "epoch": 0.3386134804078753, + "loss": 1.0969736576080322, + "loss_ce": 0.0017587259644642472, + "loss_iou": 0.5, + "loss_num": 0.0181884765625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 166772436, + "step": 1320 + }, + { + "epoch": 0.3388700057718207, + "grad_norm": 63.980838775634766, + "learning_rate": 5e-06, + "loss": 1.1151, + "num_input_tokens_seen": 166898972, + "step": 1321 + }, + { + "epoch": 0.3388700057718207, + "loss": 1.1305232048034668, + "loss_ce": 0.0011286857770755887, + "loss_iou": 0.5234375, + "loss_num": 0.0169677734375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 166898972, + "step": 1321 + }, + { + "epoch": 0.3391265311357661, + "grad_norm": 52.205997467041016, + "learning_rate": 5e-06, + "loss": 1.2111, + "num_input_tokens_seen": 167024664, + "step": 1322 + }, + { + "epoch": 0.3391265311357661, + "loss": 1.470261573791504, + "loss_ce": 0.0024880755227059126, + "loss_iou": 0.65625, + "loss_num": 0.031494140625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 167024664, + "step": 1322 + }, + { + "epoch": 0.3393830564997114, + "grad_norm": 47.83674240112305, + "learning_rate": 5e-06, + "loss": 1.0788, + "num_input_tokens_seen": 167151860, + "step": 1323 + }, + { + "epoch": 0.3393830564997114, + "loss": 0.9936881065368652, + "loss_ce": 0.0034537434112280607, + "loss_iou": 0.466796875, + "loss_num": 0.01129150390625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 167151860, + "step": 1323 + }, + { + "epoch": 0.3396395818636568, + "grad_norm": 37.30731964111328, + "learning_rate": 5e-06, + "loss": 1.0329, + "num_input_tokens_seen": 167279068, + "step": 1324 + }, + { + "epoch": 0.3396395818636568, + "loss": 1.0647060871124268, + "loss_ce": 0.005624149460345507, + "loss_iou": 0.486328125, + "loss_num": 0.0169677734375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 167279068, + "step": 1324 + }, + { + "epoch": 0.3398961072276021, + "grad_norm": 49.90718460083008, + "learning_rate": 5e-06, + "loss": 1.059, + "num_input_tokens_seen": 167405884, + "step": 1325 + }, + { + "epoch": 0.3398961072276021, + "loss": 1.002687692642212, + "loss_ce": 0.0007345692720264196, + "loss_iou": 0.4609375, + "loss_num": 0.0157470703125, + "loss_xval": 1.0, + "num_input_tokens_seen": 167405884, + "step": 1325 + }, + { + "epoch": 0.3401526325915475, + "grad_norm": 49.875885009765625, + "learning_rate": 5e-06, + "loss": 1.173, + "num_input_tokens_seen": 167531808, + "step": 1326 + }, + { + "epoch": 0.3401526325915475, + "loss": 1.176804780960083, + "loss_ce": 0.0024883763398975134, + "loss_iou": 0.54296875, + "loss_num": 0.0179443359375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 167531808, + "step": 1326 + }, + { + "epoch": 0.34040915795549287, + "grad_norm": 69.873291015625, + "learning_rate": 5e-06, + "loss": 1.3702, + "num_input_tokens_seen": 167658400, + "step": 1327 + }, + { + "epoch": 0.34040915795549287, + "loss": 1.226730465888977, + "loss_ce": 0.0021210263948887587, + "loss_iou": 0.57421875, + "loss_num": 0.01446533203125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 167658400, + "step": 1327 + }, + { + "epoch": 0.3406656833194382, + "grad_norm": 50.65689468383789, + "learning_rate": 5e-06, + "loss": 1.2498, + "num_input_tokens_seen": 167784240, + "step": 1328 + }, + { + "epoch": 0.3406656833194382, + "loss": 1.133530855178833, + "loss_ce": 0.0012067120987921953, + "loss_iou": 0.53125, + "loss_num": 0.0142822265625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 167784240, + "step": 1328 + }, + { + "epoch": 0.3409222086833836, + "grad_norm": 44.50754165649414, + "learning_rate": 5e-06, + "loss": 1.1497, + "num_input_tokens_seen": 167910944, + "step": 1329 + }, + { + "epoch": 0.3409222086833836, + "loss": 1.3607301712036133, + "loss_ce": 0.004284847527742386, + "loss_iou": 0.59765625, + "loss_num": 0.032470703125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 167910944, + "step": 1329 + }, + { + "epoch": 0.3411787340473289, + "grad_norm": 45.09111785888672, + "learning_rate": 5e-06, + "loss": 1.1775, + "num_input_tokens_seen": 168036852, + "step": 1330 + }, + { + "epoch": 0.3411787340473289, + "loss": 0.9884587526321411, + "loss_ce": 0.0006657983758486807, + "loss_iou": 0.466796875, + "loss_num": 0.0111083984375, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 168036852, + "step": 1330 + }, + { + "epoch": 0.3414352594112743, + "grad_norm": 58.9768180847168, + "learning_rate": 5e-06, + "loss": 1.1181, + "num_input_tokens_seen": 168163240, + "step": 1331 + }, + { + "epoch": 0.3414352594112743, + "loss": 1.1302016973495483, + "loss_ce": 0.0012954032281413674, + "loss_iou": 0.53125, + "loss_num": 0.013427734375, + "loss_xval": 1.125, + "num_input_tokens_seen": 168163240, + "step": 1331 + }, + { + "epoch": 0.34169178477521966, + "grad_norm": 54.49114227294922, + "learning_rate": 5e-06, + "loss": 1.3087, + "num_input_tokens_seen": 168289304, + "step": 1332 + }, + { + "epoch": 0.34169178477521966, + "loss": 1.2778449058532715, + "loss_ce": 0.0014777167234569788, + "loss_iou": 0.58984375, + "loss_num": 0.019775390625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 168289304, + "step": 1332 + }, + { + "epoch": 0.341948310139165, + "grad_norm": 63.94794464111328, + "learning_rate": 5e-06, + "loss": 1.089, + "num_input_tokens_seen": 168415044, + "step": 1333 + }, + { + "epoch": 0.341948310139165, + "loss": 1.0782265663146973, + "loss_ce": 0.00254302890971303, + "loss_iou": 0.50390625, + "loss_num": 0.0128173828125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 168415044, + "step": 1333 + }, + { + "epoch": 0.34220483550311037, + "grad_norm": 43.618343353271484, + "learning_rate": 5e-06, + "loss": 1.2661, + "num_input_tokens_seen": 168540384, + "step": 1334 + }, + { + "epoch": 0.34220483550311037, + "loss": 1.4584414958953857, + "loss_ce": 0.0014102550921961665, + "loss_iou": 0.6484375, + "loss_num": 0.03271484375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 168540384, + "step": 1334 + }, + { + "epoch": 0.34246136086705575, + "grad_norm": 34.72237014770508, + "learning_rate": 5e-06, + "loss": 1.1261, + "num_input_tokens_seen": 168667540, + "step": 1335 + }, + { + "epoch": 0.34246136086705575, + "loss": 1.0935579538345337, + "loss_ce": 0.0007845707004889846, + "loss_iou": 0.50390625, + "loss_num": 0.0167236328125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 168667540, + "step": 1335 + }, + { + "epoch": 0.3427178862310011, + "grad_norm": 31.859899520874023, + "learning_rate": 5e-06, + "loss": 1.1939, + "num_input_tokens_seen": 168793620, + "step": 1336 + }, + { + "epoch": 0.3427178862310011, + "loss": 1.2027677297592163, + "loss_ce": 0.0006193062290549278, + "loss_iou": 0.515625, + "loss_num": 0.033447265625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 168793620, + "step": 1336 + }, + { + "epoch": 0.34297441159494646, + "grad_norm": 49.95622253417969, + "learning_rate": 5e-06, + "loss": 1.0998, + "num_input_tokens_seen": 168918612, + "step": 1337 + }, + { + "epoch": 0.34297441159494646, + "loss": 1.0581738948822021, + "loss_ce": 0.000556750048417598, + "loss_iou": 0.484375, + "loss_num": 0.018310546875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 168918612, + "step": 1337 + }, + { + "epoch": 0.3432309369588918, + "grad_norm": 57.08076858520508, + "learning_rate": 5e-06, + "loss": 1.1881, + "num_input_tokens_seen": 169043920, + "step": 1338 + }, + { + "epoch": 0.3432309369588918, + "loss": 1.1711890697479248, + "loss_ce": 0.0002906341396737844, + "loss_iou": 0.53515625, + "loss_num": 0.01953125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 169043920, + "step": 1338 + }, + { + "epoch": 0.34348746232283717, + "grad_norm": 51.76689529418945, + "learning_rate": 5e-06, + "loss": 1.2305, + "num_input_tokens_seen": 169170172, + "step": 1339 + }, + { + "epoch": 0.34348746232283717, + "loss": 1.1652652025222778, + "loss_ce": 0.001691012759692967, + "loss_iou": 0.51953125, + "loss_num": 0.0242919921875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 169170172, + "step": 1339 + }, + { + "epoch": 0.34374398768678255, + "grad_norm": 168.36090087890625, + "learning_rate": 5e-06, + "loss": 1.2061, + "num_input_tokens_seen": 169296032, + "step": 1340 + }, + { + "epoch": 0.34374398768678255, + "loss": 1.1418664455413818, + "loss_ce": 0.0007531064911745489, + "loss_iou": 0.53125, + "loss_num": 0.0164794921875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 169296032, + "step": 1340 + }, + { + "epoch": 0.3440005130507279, + "grad_norm": 51.47685241699219, + "learning_rate": 5e-06, + "loss": 1.3233, + "num_input_tokens_seen": 169422704, + "step": 1341 + }, + { + "epoch": 0.3440005130507279, + "loss": 1.3653734922409058, + "loss_ce": 0.0006274158367887139, + "loss_iou": 0.6171875, + "loss_num": 0.0262451171875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 169422704, + "step": 1341 + }, + { + "epoch": 0.34425703841467326, + "grad_norm": 32.98845291137695, + "learning_rate": 5e-06, + "loss": 1.0941, + "num_input_tokens_seen": 169548652, + "step": 1342 + }, + { + "epoch": 0.34425703841467326, + "loss": 1.0560872554779053, + "loss_ce": 0.0021322641987353563, + "loss_iou": 0.474609375, + "loss_num": 0.020751953125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 169548652, + "step": 1342 + }, + { + "epoch": 0.34451356377861864, + "grad_norm": 48.99879837036133, + "learning_rate": 5e-06, + "loss": 1.1142, + "num_input_tokens_seen": 169675520, + "step": 1343 + }, + { + "epoch": 0.34451356377861864, + "loss": 1.01632821559906, + "loss_ce": 0.0026564113795757294, + "loss_iou": 0.47265625, + "loss_num": 0.0140380859375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 169675520, + "step": 1343 + }, + { + "epoch": 0.34477008914256396, + "grad_norm": 41.69797897338867, + "learning_rate": 5e-06, + "loss": 1.1738, + "num_input_tokens_seen": 169801548, + "step": 1344 + }, + { + "epoch": 0.34477008914256396, + "loss": 1.2061631679534912, + "loss_ce": 0.003038189373910427, + "loss_iou": 0.5546875, + "loss_num": 0.0185546875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 169801548, + "step": 1344 + }, + { + "epoch": 0.34502661450650934, + "grad_norm": 45.8388786315918, + "learning_rate": 5e-06, + "loss": 1.1243, + "num_input_tokens_seen": 169927600, + "step": 1345 + }, + { + "epoch": 0.34502661450650934, + "loss": 1.1806461811065674, + "loss_ce": 0.002911780495196581, + "loss_iou": 0.52734375, + "loss_num": 0.0242919921875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 169927600, + "step": 1345 + }, + { + "epoch": 0.34528313987045467, + "grad_norm": 53.046443939208984, + "learning_rate": 5e-06, + "loss": 1.1763, + "num_input_tokens_seen": 170053520, + "step": 1346 + }, + { + "epoch": 0.34528313987045467, + "loss": 1.1936041116714478, + "loss_ce": 0.003662701463326812, + "loss_iou": 0.546875, + "loss_num": 0.0184326171875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 170053520, + "step": 1346 + }, + { + "epoch": 0.34553966523440005, + "grad_norm": 103.20085906982422, + "learning_rate": 5e-06, + "loss": 1.1791, + "num_input_tokens_seen": 170179596, + "step": 1347 + }, + { + "epoch": 0.34553966523440005, + "loss": 1.1439260244369507, + "loss_ce": 0.002812773222103715, + "loss_iou": 0.5234375, + "loss_num": 0.0181884765625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 170179596, + "step": 1347 + }, + { + "epoch": 0.34579619059834543, + "grad_norm": 51.921688079833984, + "learning_rate": 5e-06, + "loss": 1.3816, + "num_input_tokens_seen": 170305820, + "step": 1348 + }, + { + "epoch": 0.34579619059834543, + "loss": 1.381309986114502, + "loss_ce": 0.0024037775583565235, + "loss_iou": 0.62890625, + "loss_num": 0.0240478515625, + "loss_xval": 1.375, + "num_input_tokens_seen": 170305820, + "step": 1348 + }, + { + "epoch": 0.34605271596229076, + "grad_norm": 50.30708312988281, + "learning_rate": 5e-06, + "loss": 1.139, + "num_input_tokens_seen": 170432604, + "step": 1349 + }, + { + "epoch": 0.34605271596229076, + "loss": 0.9168659448623657, + "loss_ce": 0.0018268261337652802, + "loss_iou": 0.42578125, + "loss_num": 0.01239013671875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 170432604, + "step": 1349 + }, + { + "epoch": 0.34630924132623614, + "grad_norm": 69.55729675292969, + "learning_rate": 5e-06, + "loss": 1.106, + "num_input_tokens_seen": 170559592, + "step": 1350 + }, + { + "epoch": 0.34630924132623614, + "loss": 0.8677027225494385, + "loss_ce": 0.0005152039811946452, + "loss_iou": 0.416015625, + "loss_num": 0.00677490234375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 170559592, + "step": 1350 + }, + { + "epoch": 0.34656576669018146, + "grad_norm": 59.65449523925781, + "learning_rate": 5e-06, + "loss": 1.2311, + "num_input_tokens_seen": 170687448, + "step": 1351 + }, + { + "epoch": 0.34656576669018146, + "loss": 1.334472417831421, + "loss_ce": 0.001464664819650352, + "loss_iou": 0.625, + "loss_num": 0.0169677734375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 170687448, + "step": 1351 + }, + { + "epoch": 0.34682229205412685, + "grad_norm": 39.04547119140625, + "learning_rate": 5e-06, + "loss": 1.2128, + "num_input_tokens_seen": 170812508, + "step": 1352 + }, + { + "epoch": 0.34682229205412685, + "loss": 1.2865699529647827, + "loss_ce": 0.001413637539371848, + "loss_iou": 0.55859375, + "loss_num": 0.033447265625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 170812508, + "step": 1352 + }, + { + "epoch": 0.3470788174180722, + "grad_norm": 48.79114532470703, + "learning_rate": 5e-06, + "loss": 1.1511, + "num_input_tokens_seen": 170938424, + "step": 1353 + }, + { + "epoch": 0.3470788174180722, + "loss": 1.0621917247772217, + "loss_ce": 0.0016447447706013918, + "loss_iou": 0.498046875, + "loss_num": 0.01324462890625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 170938424, + "step": 1353 + }, + { + "epoch": 0.34733534278201755, + "grad_norm": 72.50634002685547, + "learning_rate": 5e-06, + "loss": 1.1681, + "num_input_tokens_seen": 171064376, + "step": 1354 + }, + { + "epoch": 0.34733534278201755, + "loss": 1.0580462217330933, + "loss_ce": 0.00238218205049634, + "loss_iou": 0.5, + "loss_num": 0.0103759765625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 171064376, + "step": 1354 + }, + { + "epoch": 0.34759186814596293, + "grad_norm": 60.18556594848633, + "learning_rate": 5e-06, + "loss": 1.1269, + "num_input_tokens_seen": 171192020, + "step": 1355 + }, + { + "epoch": 0.34759186814596293, + "loss": 0.9656658172607422, + "loss_ce": 0.000822105910629034, + "loss_iou": 0.443359375, + "loss_num": 0.015625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 171192020, + "step": 1355 + }, + { + "epoch": 0.3478483935099083, + "grad_norm": 71.22262573242188, + "learning_rate": 5e-06, + "loss": 1.1078, + "num_input_tokens_seen": 171318580, + "step": 1356 + }, + { + "epoch": 0.3478483935099083, + "loss": 1.1020182371139526, + "loss_ce": 0.002897149883210659, + "loss_iou": 0.51171875, + "loss_num": 0.01531982421875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 171318580, + "step": 1356 + }, + { + "epoch": 0.34810491887385364, + "grad_norm": 47.881080627441406, + "learning_rate": 5e-06, + "loss": 1.2823, + "num_input_tokens_seen": 171444244, + "step": 1357 + }, + { + "epoch": 0.34810491887385364, + "loss": 1.242171287536621, + "loss_ce": 0.003401759546250105, + "loss_iou": 0.56640625, + "loss_num": 0.02099609375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 171444244, + "step": 1357 + }, + { + "epoch": 0.348361444237799, + "grad_norm": 44.870113372802734, + "learning_rate": 5e-06, + "loss": 1.0988, + "num_input_tokens_seen": 171571080, + "step": 1358 + }, + { + "epoch": 0.348361444237799, + "loss": 1.1715128421783447, + "loss_ce": 0.007938620634377003, + "loss_iou": 0.53125, + "loss_num": 0.020263671875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 171571080, + "step": 1358 + }, + { + "epoch": 0.34861796960174435, + "grad_norm": 54.634620666503906, + "learning_rate": 5e-06, + "loss": 1.1312, + "num_input_tokens_seen": 171698600, + "step": 1359 + }, + { + "epoch": 0.34861796960174435, + "loss": 1.0396037101745605, + "loss_ce": 0.0005412074970081449, + "loss_iou": 0.48828125, + "loss_num": 0.01239013671875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 171698600, + "step": 1359 + }, + { + "epoch": 0.34887449496568973, + "grad_norm": 43.267513275146484, + "learning_rate": 5e-06, + "loss": 1.0817, + "num_input_tokens_seen": 171824272, + "step": 1360 + }, + { + "epoch": 0.34887449496568973, + "loss": 0.9208498597145081, + "loss_ce": 0.0009279837249778211, + "loss_iou": 0.43359375, + "loss_num": 0.01080322265625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 171824272, + "step": 1360 + }, + { + "epoch": 0.3491310203296351, + "grad_norm": 30.21150779724121, + "learning_rate": 5e-06, + "loss": 1.192, + "num_input_tokens_seen": 171951056, + "step": 1361 + }, + { + "epoch": 0.3491310203296351, + "loss": 1.253859519958496, + "loss_ce": 0.0014181847218424082, + "loss_iou": 0.5703125, + "loss_num": 0.022705078125, + "loss_xval": 1.25, + "num_input_tokens_seen": 171951056, + "step": 1361 + }, + { + "epoch": 0.34938754569358044, + "grad_norm": 42.033233642578125, + "learning_rate": 5e-06, + "loss": 1.0594, + "num_input_tokens_seen": 172078172, + "step": 1362 + }, + { + "epoch": 0.34938754569358044, + "loss": 0.8543832302093506, + "loss_ce": 0.00037935556611046195, + "loss_iou": 0.40625, + "loss_num": 0.00799560546875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 172078172, + "step": 1362 + }, + { + "epoch": 0.3496440710575258, + "grad_norm": 55.845298767089844, + "learning_rate": 5e-06, + "loss": 1.1287, + "num_input_tokens_seen": 172204364, + "step": 1363 + }, + { + "epoch": 0.3496440710575258, + "loss": 1.1958149671554565, + "loss_ce": 0.0005024656420573592, + "loss_iou": 0.55859375, + "loss_num": 0.0155029296875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 172204364, + "step": 1363 + }, + { + "epoch": 0.3499005964214712, + "grad_norm": 96.85700225830078, + "learning_rate": 5e-06, + "loss": 1.152, + "num_input_tokens_seen": 172331100, + "step": 1364 + }, + { + "epoch": 0.3499005964214712, + "loss": 0.944640040397644, + "loss_ce": 0.00030413639615289867, + "loss_iou": 0.455078125, + "loss_num": 0.00665283203125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 172331100, + "step": 1364 + }, + { + "epoch": 0.3501571217854165, + "grad_norm": 44.4818115234375, + "learning_rate": 5e-06, + "loss": 1.2995, + "num_input_tokens_seen": 172457212, + "step": 1365 + }, + { + "epoch": 0.3501571217854165, + "loss": 1.1931391954421997, + "loss_ce": 0.0017329109832644463, + "loss_iou": 0.5625, + "loss_num": 0.01385498046875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 172457212, + "step": 1365 + }, + { + "epoch": 0.3504136471493619, + "grad_norm": 45.176918029785156, + "learning_rate": 5e-06, + "loss": 1.1547, + "num_input_tokens_seen": 172583236, + "step": 1366 + }, + { + "epoch": 0.3504136471493619, + "loss": 1.1660646200180054, + "loss_ce": 0.0024903868325054646, + "loss_iou": 0.515625, + "loss_num": 0.02685546875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 172583236, + "step": 1366 + }, + { + "epoch": 0.35067017251330723, + "grad_norm": 36.068519592285156, + "learning_rate": 5e-06, + "loss": 1.147, + "num_input_tokens_seen": 172709544, + "step": 1367 + }, + { + "epoch": 0.35067017251330723, + "loss": 1.2256264686584473, + "loss_ce": 0.001993701793253422, + "loss_iou": 0.56640625, + "loss_num": 0.0185546875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 172709544, + "step": 1367 + }, + { + "epoch": 0.3509266978772526, + "grad_norm": 75.27877044677734, + "learning_rate": 5e-06, + "loss": 1.0456, + "num_input_tokens_seen": 172837176, + "step": 1368 + }, + { + "epoch": 0.3509266978772526, + "loss": 1.131980538368225, + "loss_ce": 0.0006328823510557413, + "loss_iou": 0.51953125, + "loss_num": 0.0189208984375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 172837176, + "step": 1368 + }, + { + "epoch": 0.351183223241198, + "grad_norm": 46.12670135498047, + "learning_rate": 5e-06, + "loss": 1.3464, + "num_input_tokens_seen": 172963636, + "step": 1369 + }, + { + "epoch": 0.351183223241198, + "loss": 1.3854703903198242, + "loss_ce": 0.0007048420375213027, + "loss_iou": 0.63671875, + "loss_num": 0.0220947265625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 172963636, + "step": 1369 + }, + { + "epoch": 0.3514397486051433, + "grad_norm": 33.23619079589844, + "learning_rate": 5e-06, + "loss": 1.1803, + "num_input_tokens_seen": 173089652, + "step": 1370 + }, + { + "epoch": 0.3514397486051433, + "loss": 1.1686625480651855, + "loss_ce": 0.00020550224871840328, + "loss_iou": 0.54296875, + "loss_num": 0.01611328125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 173089652, + "step": 1370 + }, + { + "epoch": 0.3516962739690887, + "grad_norm": 37.84006118774414, + "learning_rate": 5e-06, + "loss": 1.1203, + "num_input_tokens_seen": 173217296, + "step": 1371 + }, + { + "epoch": 0.3516962739690887, + "loss": 1.1790614128112793, + "loss_ce": 0.004744932986795902, + "loss_iou": 0.5390625, + "loss_num": 0.02001953125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 173217296, + "step": 1371 + }, + { + "epoch": 0.351952799333034, + "grad_norm": 46.94552230834961, + "learning_rate": 5e-06, + "loss": 1.2198, + "num_input_tokens_seen": 173343112, + "step": 1372 + }, + { + "epoch": 0.351952799333034, + "loss": 1.356769323348999, + "loss_ce": 0.006183353252708912, + "loss_iou": 0.578125, + "loss_num": 0.0380859375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 173343112, + "step": 1372 + }, + { + "epoch": 0.3522093246969794, + "grad_norm": 84.84690856933594, + "learning_rate": 5e-06, + "loss": 1.1954, + "num_input_tokens_seen": 173468948, + "step": 1373 + }, + { + "epoch": 0.3522093246969794, + "loss": 1.1856458187103271, + "loss_ce": 0.0020520584657788277, + "loss_iou": 0.53125, + "loss_num": 0.024658203125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 173468948, + "step": 1373 + }, + { + "epoch": 0.3524658500609248, + "grad_norm": 46.36500549316406, + "learning_rate": 5e-06, + "loss": 1.2977, + "num_input_tokens_seen": 173595860, + "step": 1374 + }, + { + "epoch": 0.3524658500609248, + "loss": 1.244938611984253, + "loss_ce": 0.0012861847644671798, + "loss_iou": 0.5703125, + "loss_num": 0.0205078125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 173595860, + "step": 1374 + }, + { + "epoch": 0.3527223754248701, + "grad_norm": 36.35874557495117, + "learning_rate": 5e-06, + "loss": 1.1501, + "num_input_tokens_seen": 173722584, + "step": 1375 + }, + { + "epoch": 0.3527223754248701, + "loss": 1.112770915031433, + "loss_ce": 0.004372420255094767, + "loss_iou": 0.515625, + "loss_num": 0.01544189453125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 173722584, + "step": 1375 + }, + { + "epoch": 0.3529789007888155, + "grad_norm": 61.23054885864258, + "learning_rate": 5e-06, + "loss": 0.9976, + "num_input_tokens_seen": 173849740, + "step": 1376 + }, + { + "epoch": 0.3529789007888155, + "loss": 0.9914021492004395, + "loss_ce": 0.00019115865870844573, + "loss_iou": 0.453125, + "loss_num": 0.016845703125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 173849740, + "step": 1376 + }, + { + "epoch": 0.3532354261527609, + "grad_norm": 56.992774963378906, + "learning_rate": 5e-06, + "loss": 1.3138, + "num_input_tokens_seen": 173977428, + "step": 1377 + }, + { + "epoch": 0.3532354261527609, + "loss": 1.2920899391174316, + "loss_ce": 0.004003926645964384, + "loss_iou": 0.578125, + "loss_num": 0.026123046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 173977428, + "step": 1377 + }, + { + "epoch": 0.3534919515167062, + "grad_norm": 43.95704650878906, + "learning_rate": 5e-06, + "loss": 1.1033, + "num_input_tokens_seen": 174104608, + "step": 1378 + }, + { + "epoch": 0.3534919515167062, + "loss": 1.1377105712890625, + "loss_ce": 0.0009918669238686562, + "loss_iou": 0.5234375, + "loss_num": 0.017578125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 174104608, + "step": 1378 + }, + { + "epoch": 0.3537484768806516, + "grad_norm": 52.715999603271484, + "learning_rate": 5e-06, + "loss": 1.1251, + "num_input_tokens_seen": 174230556, + "step": 1379 + }, + { + "epoch": 0.3537484768806516, + "loss": 1.0484061241149902, + "loss_ce": 0.0005546641768887639, + "loss_iou": 0.494140625, + "loss_num": 0.01177978515625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 174230556, + "step": 1379 + }, + { + "epoch": 0.3540050022445969, + "grad_norm": 84.34625244140625, + "learning_rate": 5e-06, + "loss": 1.1601, + "num_input_tokens_seen": 174357904, + "step": 1380 + }, + { + "epoch": 0.3540050022445969, + "loss": 0.9797146320343018, + "loss_ce": 0.0033962265588343143, + "loss_iou": 0.447265625, + "loss_num": 0.0164794921875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 174357904, + "step": 1380 + }, + { + "epoch": 0.3542615276085423, + "grad_norm": 50.631744384765625, + "learning_rate": 5e-06, + "loss": 1.3826, + "num_input_tokens_seen": 174484976, + "step": 1381 + }, + { + "epoch": 0.3542615276085423, + "loss": 1.5988245010375977, + "loss_ce": 0.002633117139339447, + "loss_iou": 0.6953125, + "loss_num": 0.040771484375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 174484976, + "step": 1381 + }, + { + "epoch": 0.3545180529724877, + "grad_norm": 54.359886169433594, + "learning_rate": 5e-06, + "loss": 1.0438, + "num_input_tokens_seen": 174611000, + "step": 1382 + }, + { + "epoch": 0.3545180529724877, + "loss": 0.9444396495819092, + "loss_ce": 0.0005920346011407673, + "loss_iou": 0.44921875, + "loss_num": 0.0087890625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 174611000, + "step": 1382 + }, + { + "epoch": 0.354774578336433, + "grad_norm": 74.90353393554688, + "learning_rate": 5e-06, + "loss": 1.1648, + "num_input_tokens_seen": 174737032, + "step": 1383 + }, + { + "epoch": 0.354774578336433, + "loss": 1.1236404180526733, + "loss_ce": 0.0010818042792379856, + "loss_iou": 0.52734375, + "loss_num": 0.01416015625, + "loss_xval": 1.125, + "num_input_tokens_seen": 174737032, + "step": 1383 + }, + { + "epoch": 0.3550311037003784, + "grad_norm": 55.336910247802734, + "learning_rate": 5e-06, + "loss": 1.219, + "num_input_tokens_seen": 174865076, + "step": 1384 + }, + { + "epoch": 0.3550311037003784, + "loss": 1.269698143005371, + "loss_ce": 0.002120112767443061, + "loss_iou": 0.58203125, + "loss_num": 0.020751953125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 174865076, + "step": 1384 + }, + { + "epoch": 0.35528762906432376, + "grad_norm": 27.341716766357422, + "learning_rate": 5e-06, + "loss": 1.0849, + "num_input_tokens_seen": 174990928, + "step": 1385 + }, + { + "epoch": 0.35528762906432376, + "loss": 0.9743456840515137, + "loss_ce": 0.0031542396172881126, + "loss_iou": 0.443359375, + "loss_num": 0.0172119140625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 174990928, + "step": 1385 + }, + { + "epoch": 0.3555441544282691, + "grad_norm": 48.25249099731445, + "learning_rate": 5e-06, + "loss": 1.0641, + "num_input_tokens_seen": 175118028, + "step": 1386 + }, + { + "epoch": 0.3555441544282691, + "loss": 1.0575331449508667, + "loss_ce": 0.0018691245932132006, + "loss_iou": 0.484375, + "loss_num": 0.0177001953125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 175118028, + "step": 1386 + }, + { + "epoch": 0.35580067979221447, + "grad_norm": 79.824951171875, + "learning_rate": 5e-06, + "loss": 1.1924, + "num_input_tokens_seen": 175244648, + "step": 1387 + }, + { + "epoch": 0.35580067979221447, + "loss": 1.30496346950531, + "loss_ce": 0.0027173494454473257, + "loss_iou": 0.59375, + "loss_num": 0.0235595703125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 175244648, + "step": 1387 + }, + { + "epoch": 0.3560572051561598, + "grad_norm": 44.5381965637207, + "learning_rate": 5e-06, + "loss": 1.2708, + "num_input_tokens_seen": 175371784, + "step": 1388 + }, + { + "epoch": 0.3560572051561598, + "loss": 1.2244952917099, + "loss_ce": 0.0013507817639037967, + "loss_iou": 0.55859375, + "loss_num": 0.0208740234375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 175371784, + "step": 1388 + }, + { + "epoch": 0.3563137305201052, + "grad_norm": 27.7333984375, + "learning_rate": 5e-06, + "loss": 1.1295, + "num_input_tokens_seen": 175496948, + "step": 1389 + }, + { + "epoch": 0.3563137305201052, + "loss": 1.335802435874939, + "loss_ce": 0.0027946592308580875, + "loss_iou": 0.59765625, + "loss_num": 0.0277099609375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 175496948, + "step": 1389 + }, + { + "epoch": 0.35657025588405056, + "grad_norm": 60.578983306884766, + "learning_rate": 5e-06, + "loss": 1.1142, + "num_input_tokens_seen": 175623460, + "step": 1390 + }, + { + "epoch": 0.35657025588405056, + "loss": 1.2890775203704834, + "loss_ce": 0.0024563451297581196, + "loss_iou": 0.59765625, + "loss_num": 0.0185546875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 175623460, + "step": 1390 + }, + { + "epoch": 0.3568267812479959, + "grad_norm": 57.77640914916992, + "learning_rate": 5e-06, + "loss": 1.2095, + "num_input_tokens_seen": 175751028, + "step": 1391 + }, + { + "epoch": 0.3568267812479959, + "loss": 1.2185487747192383, + "loss_ce": 0.0002870266616810113, + "loss_iou": 0.578125, + "loss_num": 0.011962890625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 175751028, + "step": 1391 + }, + { + "epoch": 0.35708330661194126, + "grad_norm": 38.928531646728516, + "learning_rate": 5e-06, + "loss": 1.1525, + "num_input_tokens_seen": 175878328, + "step": 1392 + }, + { + "epoch": 0.35708330661194126, + "loss": 1.3805654048919678, + "loss_ce": 0.00849514827132225, + "loss_iou": 0.61328125, + "loss_num": 0.02978515625, + "loss_xval": 1.375, + "num_input_tokens_seen": 175878328, + "step": 1392 + }, + { + "epoch": 0.3573398319758866, + "grad_norm": 52.213321685791016, + "learning_rate": 5e-06, + "loss": 1.112, + "num_input_tokens_seen": 176004192, + "step": 1393 + }, + { + "epoch": 0.3573398319758866, + "loss": 1.0061274766921997, + "loss_ce": 0.0017329610418528318, + "loss_iou": 0.4765625, + "loss_num": 0.0101318359375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 176004192, + "step": 1393 + }, + { + "epoch": 0.35759635733983197, + "grad_norm": 84.93548583984375, + "learning_rate": 5e-06, + "loss": 1.1438, + "num_input_tokens_seen": 176132076, + "step": 1394 + }, + { + "epoch": 0.35759635733983197, + "loss": 1.0486780405044556, + "loss_ce": 0.0008263917407020926, + "loss_iou": 0.4921875, + "loss_num": 0.01300048828125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 176132076, + "step": 1394 + }, + { + "epoch": 0.35785288270377735, + "grad_norm": 47.76504898071289, + "learning_rate": 5e-06, + "loss": 1.2714, + "num_input_tokens_seen": 176258876, + "step": 1395 + }, + { + "epoch": 0.35785288270377735, + "loss": 1.28211510181427, + "loss_ce": 0.0008651985554024577, + "loss_iou": 0.5859375, + "loss_num": 0.021240234375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 176258876, + "step": 1395 + }, + { + "epoch": 0.3581094080677227, + "grad_norm": 42.5472412109375, + "learning_rate": 5e-06, + "loss": 1.0582, + "num_input_tokens_seen": 176385112, + "step": 1396 + }, + { + "epoch": 0.3581094080677227, + "loss": 1.2524986267089844, + "loss_ce": 0.008357943035662174, + "loss_iou": 0.5859375, + "loss_num": 0.014404296875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 176385112, + "step": 1396 + }, + { + "epoch": 0.35836593343166806, + "grad_norm": 65.12969970703125, + "learning_rate": 5e-06, + "loss": 1.1449, + "num_input_tokens_seen": 176511944, + "step": 1397 + }, + { + "epoch": 0.35836593343166806, + "loss": 1.2545161247253418, + "loss_ce": 0.002563050016760826, + "loss_iou": 0.5703125, + "loss_num": 0.0225830078125, + "loss_xval": 1.25, + "num_input_tokens_seen": 176511944, + "step": 1397 + }, + { + "epoch": 0.35862245879561344, + "grad_norm": 52.363250732421875, + "learning_rate": 5e-06, + "loss": 1.2255, + "num_input_tokens_seen": 176637872, + "step": 1398 + }, + { + "epoch": 0.35862245879561344, + "loss": 1.2409348487854004, + "loss_ce": 0.0007004392682574689, + "loss_iou": 0.58203125, + "loss_num": 0.0157470703125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 176637872, + "step": 1398 + }, + { + "epoch": 0.35887898415955877, + "grad_norm": 31.810760498046875, + "learning_rate": 5e-06, + "loss": 1.1042, + "num_input_tokens_seen": 176765072, + "step": 1399 + }, + { + "epoch": 0.35887898415955877, + "loss": 1.396734595298767, + "loss_ce": 0.0007385142962448299, + "loss_iou": 0.62109375, + "loss_num": 0.0303955078125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 176765072, + "step": 1399 + }, + { + "epoch": 0.35913550952350415, + "grad_norm": 49.842411041259766, + "learning_rate": 5e-06, + "loss": 1.2, + "num_input_tokens_seen": 176891992, + "step": 1400 + }, + { + "epoch": 0.35913550952350415, + "loss": 1.3048043251037598, + "loss_ce": 0.0020699079614132643, + "loss_iou": 0.59765625, + "loss_num": 0.021728515625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 176891992, + "step": 1400 + }, + { + "epoch": 0.3593920348874495, + "grad_norm": 65.3194808959961, + "learning_rate": 5e-06, + "loss": 1.2217, + "num_input_tokens_seen": 177017268, + "step": 1401 + }, + { + "epoch": 0.3593920348874495, + "loss": 1.2708327770233154, + "loss_ce": 0.0022780802100896835, + "loss_iou": 0.58203125, + "loss_num": 0.0206298828125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 177017268, + "step": 1401 + }, + { + "epoch": 0.35964856025139486, + "grad_norm": 46.705814361572266, + "learning_rate": 5e-06, + "loss": 1.1345, + "num_input_tokens_seen": 177143312, + "step": 1402 + }, + { + "epoch": 0.35964856025139486, + "loss": 1.2016682624816895, + "loss_ce": 0.005867491010576487, + "loss_iou": 0.546875, + "loss_num": 0.0205078125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 177143312, + "step": 1402 + }, + { + "epoch": 0.35990508561534024, + "grad_norm": 41.54037094116211, + "learning_rate": 5e-06, + "loss": 1.1446, + "num_input_tokens_seen": 177269796, + "step": 1403 + }, + { + "epoch": 0.35990508561534024, + "loss": 1.2335948944091797, + "loss_ce": 0.0011730333790183067, + "loss_iou": 0.56640625, + "loss_num": 0.0191650390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 177269796, + "step": 1403 + }, + { + "epoch": 0.36016161097928556, + "grad_norm": 53.14521789550781, + "learning_rate": 5e-06, + "loss": 1.1874, + "num_input_tokens_seen": 177395980, + "step": 1404 + }, + { + "epoch": 0.36016161097928556, + "loss": 1.1850682497024536, + "loss_ce": 0.0024511157535016537, + "loss_iou": 0.5546875, + "loss_num": 0.01513671875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 177395980, + "step": 1404 + }, + { + "epoch": 0.36041813634323094, + "grad_norm": 70.36585998535156, + "learning_rate": 5e-06, + "loss": 1.1684, + "num_input_tokens_seen": 177522296, + "step": 1405 + }, + { + "epoch": 0.36041813634323094, + "loss": 1.150983214378357, + "loss_ce": 0.001080854912288487, + "loss_iou": 0.54296875, + "loss_num": 0.013671875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 177522296, + "step": 1405 + }, + { + "epoch": 0.36067466170717627, + "grad_norm": 51.59445571899414, + "learning_rate": 5e-06, + "loss": 1.1342, + "num_input_tokens_seen": 177648232, + "step": 1406 + }, + { + "epoch": 0.36067466170717627, + "loss": 1.0994153022766113, + "loss_ce": 0.00029418812482617795, + "loss_iou": 0.5234375, + "loss_num": 0.0096435546875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 177648232, + "step": 1406 + }, + { + "epoch": 0.36093118707112165, + "grad_norm": 45.08783721923828, + "learning_rate": 5e-06, + "loss": 1.1008, + "num_input_tokens_seen": 177775300, + "step": 1407 + }, + { + "epoch": 0.36093118707112165, + "loss": 1.0431565046310425, + "loss_ce": 0.0021409427281469107, + "loss_iou": 0.4765625, + "loss_num": 0.017578125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 177775300, + "step": 1407 + }, + { + "epoch": 0.36118771243506703, + "grad_norm": 53.53173065185547, + "learning_rate": 5e-06, + "loss": 1.1146, + "num_input_tokens_seen": 177901932, + "step": 1408 + }, + { + "epoch": 0.36118771243506703, + "loss": 1.0472989082336426, + "loss_ce": 0.00042388561996631324, + "loss_iou": 0.478515625, + "loss_num": 0.0181884765625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 177901932, + "step": 1408 + }, + { + "epoch": 0.36144423779901236, + "grad_norm": 61.29420852661133, + "learning_rate": 5e-06, + "loss": 1.2221, + "num_input_tokens_seen": 178028816, + "step": 1409 + }, + { + "epoch": 0.36144423779901236, + "loss": 1.178318738937378, + "loss_ce": 0.002049183938652277, + "loss_iou": 0.5390625, + "loss_num": 0.0201416015625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 178028816, + "step": 1409 + }, + { + "epoch": 0.36170076316295774, + "grad_norm": 77.30718231201172, + "learning_rate": 5e-06, + "loss": 1.0734, + "num_input_tokens_seen": 178153960, + "step": 1410 + }, + { + "epoch": 0.36170076316295774, + "loss": 1.0187435150146484, + "loss_ce": 0.0011654456611722708, + "loss_iou": 0.48828125, + "loss_num": 0.0079345703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 178153960, + "step": 1410 + }, + { + "epoch": 0.3619572885269031, + "grad_norm": 53.55356979370117, + "learning_rate": 5e-06, + "loss": 1.2644, + "num_input_tokens_seen": 178280760, + "step": 1411 + }, + { + "epoch": 0.3619572885269031, + "loss": 1.337110996246338, + "loss_ce": 0.0026382647920399904, + "loss_iou": 0.61328125, + "loss_num": 0.0213623046875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 178280760, + "step": 1411 + }, + { + "epoch": 0.36221381389084845, + "grad_norm": 25.140792846679688, + "learning_rate": 5e-06, + "loss": 1.1195, + "num_input_tokens_seen": 178408192, + "step": 1412 + }, + { + "epoch": 0.36221381389084845, + "loss": 1.3948564529418945, + "loss_ce": 0.0017901384271681309, + "loss_iou": 0.609375, + "loss_num": 0.035400390625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 178408192, + "step": 1412 + }, + { + "epoch": 0.3624703392547938, + "grad_norm": 36.74555587768555, + "learning_rate": 5e-06, + "loss": 1.0843, + "num_input_tokens_seen": 178534148, + "step": 1413 + }, + { + "epoch": 0.3624703392547938, + "loss": 0.9681083559989929, + "loss_ce": 0.0003349077596794814, + "loss_iou": 0.4609375, + "loss_num": 0.0093994140625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 178534148, + "step": 1413 + }, + { + "epoch": 0.36272686461873915, + "grad_norm": 39.646759033203125, + "learning_rate": 5e-06, + "loss": 1.1566, + "num_input_tokens_seen": 178660364, + "step": 1414 + }, + { + "epoch": 0.36272686461873915, + "loss": 1.0544712543487549, + "loss_ce": 0.0012486418709158897, + "loss_iou": 0.49609375, + "loss_num": 0.012451171875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 178660364, + "step": 1414 + }, + { + "epoch": 0.36298338998268453, + "grad_norm": 51.174739837646484, + "learning_rate": 5e-06, + "loss": 1.0304, + "num_input_tokens_seen": 178786120, + "step": 1415 + }, + { + "epoch": 0.36298338998268453, + "loss": 0.9179519414901733, + "loss_ce": 0.0004714433162007481, + "loss_iou": 0.443359375, + "loss_num": 0.00634765625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 178786120, + "step": 1415 + }, + { + "epoch": 0.3632399153466299, + "grad_norm": 85.84087371826172, + "learning_rate": 5e-06, + "loss": 1.1992, + "num_input_tokens_seen": 178911904, + "step": 1416 + }, + { + "epoch": 0.3632399153466299, + "loss": 1.1173064708709717, + "loss_ce": 0.0010955582838505507, + "loss_iou": 0.515625, + "loss_num": 0.0164794921875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 178911904, + "step": 1416 + }, + { + "epoch": 0.36349644071057524, + "grad_norm": 48.12837600708008, + "learning_rate": 5e-06, + "loss": 1.2909, + "num_input_tokens_seen": 179038012, + "step": 1417 + }, + { + "epoch": 0.36349644071057524, + "loss": 1.0293456315994263, + "loss_ce": 0.0005370373837649822, + "loss_iou": 0.484375, + "loss_num": 0.01239013671875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 179038012, + "step": 1417 + }, + { + "epoch": 0.3637529660745206, + "grad_norm": 36.64936828613281, + "learning_rate": 5e-06, + "loss": 1.1515, + "num_input_tokens_seen": 179165140, + "step": 1418 + }, + { + "epoch": 0.3637529660745206, + "loss": 1.2027003765106201, + "loss_ce": 0.00592296477407217, + "loss_iou": 0.55078125, + "loss_num": 0.01953125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 179165140, + "step": 1418 + }, + { + "epoch": 0.364009491438466, + "grad_norm": 61.948516845703125, + "learning_rate": 5e-06, + "loss": 1.0665, + "num_input_tokens_seen": 179290444, + "step": 1419 + }, + { + "epoch": 0.364009491438466, + "loss": 1.0884134769439697, + "loss_ce": 0.002964199986308813, + "loss_iou": 0.486328125, + "loss_num": 0.022705078125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 179290444, + "step": 1419 + }, + { + "epoch": 0.36426601680241133, + "grad_norm": 57.032588958740234, + "learning_rate": 5e-06, + "loss": 1.2553, + "num_input_tokens_seen": 179416604, + "step": 1420 + }, + { + "epoch": 0.36426601680241133, + "loss": 1.245171308517456, + "loss_ce": 0.0005423245020210743, + "loss_iou": 0.57421875, + "loss_num": 0.019287109375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 179416604, + "step": 1420 + }, + { + "epoch": 0.3645225421663567, + "grad_norm": 51.570472717285156, + "learning_rate": 5e-06, + "loss": 1.129, + "num_input_tokens_seen": 179541908, + "step": 1421 + }, + { + "epoch": 0.3645225421663567, + "loss": 1.0779342651367188, + "loss_ce": 0.002250690246000886, + "loss_iou": 0.498046875, + "loss_num": 0.0159912109375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 179541908, + "step": 1421 + }, + { + "epoch": 0.36477906753030204, + "grad_norm": 48.223907470703125, + "learning_rate": 5e-06, + "loss": 1.0895, + "num_input_tokens_seen": 179667500, + "step": 1422 + }, + { + "epoch": 0.36477906753030204, + "loss": 1.132366418838501, + "loss_ce": 0.0019953905139118433, + "loss_iou": 0.5234375, + "loss_num": 0.0157470703125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 179667500, + "step": 1422 + }, + { + "epoch": 0.3650355928942474, + "grad_norm": 58.864295959472656, + "learning_rate": 5e-06, + "loss": 1.1271, + "num_input_tokens_seen": 179795332, + "step": 1423 + }, + { + "epoch": 0.3650355928942474, + "loss": 1.1628878116607666, + "loss_ce": 0.0022432878613471985, + "loss_iou": 0.53515625, + "loss_num": 0.0181884765625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 179795332, + "step": 1423 + }, + { + "epoch": 0.3652921182581928, + "grad_norm": 50.66184616088867, + "learning_rate": 5e-06, + "loss": 1.2674, + "num_input_tokens_seen": 179921924, + "step": 1424 + }, + { + "epoch": 0.3652921182581928, + "loss": 1.226226568222046, + "loss_ce": 0.0011289040558040142, + "loss_iou": 0.5546875, + "loss_num": 0.0228271484375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 179921924, + "step": 1424 + }, + { + "epoch": 0.3655486436221381, + "grad_norm": 48.41244888305664, + "learning_rate": 5e-06, + "loss": 1.0742, + "num_input_tokens_seen": 180049676, + "step": 1425 + }, + { + "epoch": 0.3655486436221381, + "loss": 0.9825254678726196, + "loss_ce": 0.0005918809911236167, + "loss_iou": 0.4609375, + "loss_num": 0.0115966796875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 180049676, + "step": 1425 + }, + { + "epoch": 0.3658051689860835, + "grad_norm": 55.12033462524414, + "learning_rate": 5e-06, + "loss": 1.1393, + "num_input_tokens_seen": 180175584, + "step": 1426 + }, + { + "epoch": 0.3658051689860835, + "loss": 1.2039861679077148, + "loss_ce": 0.0008612559176981449, + "loss_iou": 0.55078125, + "loss_num": 0.0203857421875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 180175584, + "step": 1426 + }, + { + "epoch": 0.36606169435002883, + "grad_norm": 49.57294845581055, + "learning_rate": 5e-06, + "loss": 1.1576, + "num_input_tokens_seen": 180300616, + "step": 1427 + }, + { + "epoch": 0.36606169435002883, + "loss": 1.1654398441314697, + "loss_ce": 0.004307101015001535, + "loss_iou": 0.53125, + "loss_num": 0.020263671875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 180300616, + "step": 1427 + }, + { + "epoch": 0.3663182197139742, + "grad_norm": 359.83160400390625, + "learning_rate": 5e-06, + "loss": 1.0577, + "num_input_tokens_seen": 180425928, + "step": 1428 + }, + { + "epoch": 0.3663182197139742, + "loss": 1.2139939069747925, + "loss_ce": 0.0015915792901068926, + "loss_iou": 0.546875, + "loss_num": 0.0230712890625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 180425928, + "step": 1428 + }, + { + "epoch": 0.3665747450779196, + "grad_norm": 52.846832275390625, + "learning_rate": 5e-06, + "loss": 1.2341, + "num_input_tokens_seen": 180551508, + "step": 1429 + }, + { + "epoch": 0.3665747450779196, + "loss": 1.01686692237854, + "loss_ce": 0.001241927850060165, + "loss_iou": 0.474609375, + "loss_num": 0.01312255859375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 180551508, + "step": 1429 + }, + { + "epoch": 0.3668312704418649, + "grad_norm": 54.247379302978516, + "learning_rate": 5e-06, + "loss": 1.1757, + "num_input_tokens_seen": 180678760, + "step": 1430 + }, + { + "epoch": 0.3668312704418649, + "loss": 1.1665047407150269, + "loss_ce": 0.001953993458300829, + "loss_iou": 0.5234375, + "loss_num": 0.0242919921875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 180678760, + "step": 1430 + }, + { + "epoch": 0.3670877958058103, + "grad_norm": 64.9625473022461, + "learning_rate": 5e-06, + "loss": 1.1795, + "num_input_tokens_seen": 180804548, + "step": 1431 + }, + { + "epoch": 0.3670877958058103, + "loss": 1.1200346946716309, + "loss_ce": 0.00040587206603959203, + "loss_iou": 0.52734375, + "loss_num": 0.01385498046875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 180804548, + "step": 1431 + }, + { + "epoch": 0.3673443211697557, + "grad_norm": 60.98923110961914, + "learning_rate": 5e-06, + "loss": 1.2153, + "num_input_tokens_seen": 180929400, + "step": 1432 + }, + { + "epoch": 0.3673443211697557, + "loss": 1.3304595947265625, + "loss_ce": 0.0052642421796917915, + "loss_iou": 0.59375, + "loss_num": 0.0281982421875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 180929400, + "step": 1432 + }, + { + "epoch": 0.367600846533701, + "grad_norm": 92.65335083007812, + "learning_rate": 5e-06, + "loss": 1.0999, + "num_input_tokens_seen": 181054752, + "step": 1433 + }, + { + "epoch": 0.367600846533701, + "loss": 1.1723136901855469, + "loss_ce": 0.0009269764414057136, + "loss_iou": 0.53515625, + "loss_num": 0.0208740234375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 181054752, + "step": 1433 + }, + { + "epoch": 0.3678573718976464, + "grad_norm": 44.80340576171875, + "learning_rate": 5e-06, + "loss": 1.111, + "num_input_tokens_seen": 181181180, + "step": 1434 + }, + { + "epoch": 0.3678573718976464, + "loss": 0.998780369758606, + "loss_ce": 0.0007334743859246373, + "loss_iou": 0.44921875, + "loss_num": 0.02001953125, + "loss_xval": 1.0, + "num_input_tokens_seen": 181181180, + "step": 1434 + }, + { + "epoch": 0.3681138972615917, + "grad_norm": 45.4466552734375, + "learning_rate": 5e-06, + "loss": 1.1788, + "num_input_tokens_seen": 181306016, + "step": 1435 + }, + { + "epoch": 0.3681138972615917, + "loss": 1.195596694946289, + "loss_ce": 0.0022373043466359377, + "loss_iou": 0.5546875, + "loss_num": 0.0164794921875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 181306016, + "step": 1435 + }, + { + "epoch": 0.3683704226255371, + "grad_norm": 58.181339263916016, + "learning_rate": 5e-06, + "loss": 1.086, + "num_input_tokens_seen": 181433156, + "step": 1436 + }, + { + "epoch": 0.3683704226255371, + "loss": 1.0234589576721191, + "loss_ce": 0.0009980329778045416, + "loss_iou": 0.47265625, + "loss_num": 0.01519775390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 181433156, + "step": 1436 + }, + { + "epoch": 0.3686269479894825, + "grad_norm": 60.11557388305664, + "learning_rate": 5e-06, + "loss": 1.4318, + "num_input_tokens_seen": 181559304, + "step": 1437 + }, + { + "epoch": 0.3686269479894825, + "loss": 1.6108883619308472, + "loss_ce": 0.004443113226443529, + "loss_iou": 0.70703125, + "loss_num": 0.038330078125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 181559304, + "step": 1437 + }, + { + "epoch": 0.3688834733534278, + "grad_norm": 63.923423767089844, + "learning_rate": 5e-06, + "loss": 1.1966, + "num_input_tokens_seen": 181686156, + "step": 1438 + }, + { + "epoch": 0.3688834733534278, + "loss": 1.4142377376556396, + "loss_ce": 0.003104914678260684, + "loss_iou": 0.62109375, + "loss_num": 0.0341796875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 181686156, + "step": 1438 + }, + { + "epoch": 0.3691399987173732, + "grad_norm": 58.34791946411133, + "learning_rate": 5e-06, + "loss": 1.224, + "num_input_tokens_seen": 181812904, + "step": 1439 + }, + { + "epoch": 0.3691399987173732, + "loss": 1.2716073989868164, + "loss_ce": 0.0006112903356552124, + "loss_iou": 0.578125, + "loss_num": 0.0230712890625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 181812904, + "step": 1439 + }, + { + "epoch": 0.36939652408131857, + "grad_norm": 71.49700927734375, + "learning_rate": 5e-06, + "loss": 1.1284, + "num_input_tokens_seen": 181939124, + "step": 1440 + }, + { + "epoch": 0.36939652408131857, + "loss": 1.3389750719070435, + "loss_ce": 0.0020609612111002207, + "loss_iou": 0.6015625, + "loss_num": 0.027099609375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 181939124, + "step": 1440 + }, + { + "epoch": 0.3696530494452639, + "grad_norm": 54.36580276489258, + "learning_rate": 5e-06, + "loss": 1.2617, + "num_input_tokens_seen": 182064644, + "step": 1441 + }, + { + "epoch": 0.3696530494452639, + "loss": 1.2562098503112793, + "loss_ce": 0.0037685034330934286, + "loss_iou": 0.59375, + "loss_num": 0.013427734375, + "loss_xval": 1.25, + "num_input_tokens_seen": 182064644, + "step": 1441 + }, + { + "epoch": 0.3699095748092093, + "grad_norm": 43.05189895629883, + "learning_rate": 5e-06, + "loss": 1.1161, + "num_input_tokens_seen": 182190700, + "step": 1442 + }, + { + "epoch": 0.3699095748092093, + "loss": 0.9707801938056946, + "loss_ce": 0.0005654080305248499, + "loss_iou": 0.45703125, + "loss_num": 0.01116943359375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 182190700, + "step": 1442 + }, + { + "epoch": 0.3701661001731546, + "grad_norm": 58.621910095214844, + "learning_rate": 5e-06, + "loss": 1.116, + "num_input_tokens_seen": 182317404, + "step": 1443 + }, + { + "epoch": 0.3701661001731546, + "loss": 1.0504403114318848, + "loss_ce": 0.0011238225270062685, + "loss_iou": 0.494140625, + "loss_num": 0.01214599609375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 182317404, + "step": 1443 + }, + { + "epoch": 0.3704226255371, + "grad_norm": 52.356361389160156, + "learning_rate": 5e-06, + "loss": 1.1329, + "num_input_tokens_seen": 182444136, + "step": 1444 + }, + { + "epoch": 0.3704226255371, + "loss": 1.24242103099823, + "loss_ce": 0.002186728175729513, + "loss_iou": 0.55078125, + "loss_num": 0.0283203125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 182444136, + "step": 1444 + }, + { + "epoch": 0.37067915090104536, + "grad_norm": 77.2430648803711, + "learning_rate": 5e-06, + "loss": 1.0967, + "num_input_tokens_seen": 182570484, + "step": 1445 + }, + { + "epoch": 0.37067915090104536, + "loss": 1.0517778396606445, + "loss_ce": 0.0014848210848867893, + "loss_iou": 0.484375, + "loss_num": 0.016357421875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 182570484, + "step": 1445 + }, + { + "epoch": 0.3709356762649907, + "grad_norm": 51.65608596801758, + "learning_rate": 5e-06, + "loss": 1.3162, + "num_input_tokens_seen": 182698136, + "step": 1446 + }, + { + "epoch": 0.3709356762649907, + "loss": 1.4817254543304443, + "loss_ce": 0.002233359031379223, + "loss_iou": 0.67578125, + "loss_num": 0.0262451171875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 182698136, + "step": 1446 + }, + { + "epoch": 0.37119220162893607, + "grad_norm": 30.956701278686523, + "learning_rate": 5e-06, + "loss": 1.1749, + "num_input_tokens_seen": 182824724, + "step": 1447 + }, + { + "epoch": 0.37119220162893607, + "loss": 1.1401504278182983, + "loss_ce": 0.0005019723903387785, + "loss_iou": 0.5234375, + "loss_num": 0.019287109375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 182824724, + "step": 1447 + }, + { + "epoch": 0.3714487269928814, + "grad_norm": 26.532615661621094, + "learning_rate": 5e-06, + "loss": 1.2666, + "num_input_tokens_seen": 182950348, + "step": 1448 + }, + { + "epoch": 0.3714487269928814, + "loss": 1.6688858270645142, + "loss_ce": 0.000917116878554225, + "loss_iou": 0.7109375, + "loss_num": 0.0498046875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 182950348, + "step": 1448 + }, + { + "epoch": 0.3717052523568268, + "grad_norm": 30.09617805480957, + "learning_rate": 5e-06, + "loss": 1.0391, + "num_input_tokens_seen": 183076340, + "step": 1449 + }, + { + "epoch": 0.3717052523568268, + "loss": 1.0249981880187988, + "loss_ce": 0.002537301741540432, + "loss_iou": 0.47265625, + "loss_num": 0.014892578125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 183076340, + "step": 1449 + }, + { + "epoch": 0.37196177772077216, + "grad_norm": 48.92543029785156, + "learning_rate": 5e-06, + "loss": 1.1529, + "num_input_tokens_seen": 183204332, + "step": 1450 + }, + { + "epoch": 0.37196177772077216, + "loss": 1.0905632972717285, + "loss_ce": 0.0007195285870693624, + "loss_iou": 0.5, + "loss_num": 0.017333984375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 183204332, + "step": 1450 + }, + { + "epoch": 0.3722183030847175, + "grad_norm": 81.83463287353516, + "learning_rate": 5e-06, + "loss": 1.2816, + "num_input_tokens_seen": 183331368, + "step": 1451 + }, + { + "epoch": 0.3722183030847175, + "loss": 1.1401288509368896, + "loss_ce": 0.00048049382166936994, + "loss_iou": 0.54296875, + "loss_num": 0.010498046875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 183331368, + "step": 1451 + }, + { + "epoch": 0.37247482844866286, + "grad_norm": 46.24848175048828, + "learning_rate": 5e-06, + "loss": 1.2737, + "num_input_tokens_seen": 183458728, + "step": 1452 + }, + { + "epoch": 0.37247482844866286, + "loss": 1.4954209327697754, + "loss_ce": 0.0017685178900137544, + "loss_iou": 0.67578125, + "loss_num": 0.02783203125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 183458728, + "step": 1452 + }, + { + "epoch": 0.37273135381260825, + "grad_norm": 53.595603942871094, + "learning_rate": 5e-06, + "loss": 1.096, + "num_input_tokens_seen": 183585176, + "step": 1453 + }, + { + "epoch": 0.37273135381260825, + "loss": 1.109360694885254, + "loss_ce": 0.0009622994693927467, + "loss_iou": 0.51171875, + "loss_num": 0.017333984375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 183585176, + "step": 1453 + }, + { + "epoch": 0.37298787917655357, + "grad_norm": 52.50171661376953, + "learning_rate": 5e-06, + "loss": 1.2083, + "num_input_tokens_seen": 183712196, + "step": 1454 + }, + { + "epoch": 0.37298787917655357, + "loss": 1.2069188356399536, + "loss_ce": 0.0008641676395200193, + "loss_iou": 0.546875, + "loss_num": 0.021728515625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 183712196, + "step": 1454 + }, + { + "epoch": 0.37324440454049895, + "grad_norm": 47.28135681152344, + "learning_rate": 5e-06, + "loss": 1.1427, + "num_input_tokens_seen": 183837988, + "step": 1455 + }, + { + "epoch": 0.37324440454049895, + "loss": 0.9374693036079407, + "loss_ce": 0.0019224147545173764, + "loss_iou": 0.423828125, + "loss_num": 0.017822265625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 183837988, + "step": 1455 + }, + { + "epoch": 0.3735009299044443, + "grad_norm": 51.378475189208984, + "learning_rate": 5e-06, + "loss": 1.1388, + "num_input_tokens_seen": 183964708, + "step": 1456 + }, + { + "epoch": 0.3735009299044443, + "loss": 1.0875346660614014, + "loss_ce": 0.0020854545291513205, + "loss_iou": 0.515625, + "loss_num": 0.0113525390625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 183964708, + "step": 1456 + }, + { + "epoch": 0.37375745526838966, + "grad_norm": 48.922672271728516, + "learning_rate": 5e-06, + "loss": 1.2063, + "num_input_tokens_seen": 184091092, + "step": 1457 + }, + { + "epoch": 0.37375745526838966, + "loss": 1.1303311586380005, + "loss_ce": 0.000936680706217885, + "loss_iou": 0.51171875, + "loss_num": 0.0206298828125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 184091092, + "step": 1457 + }, + { + "epoch": 0.37401398063233504, + "grad_norm": 35.330074310302734, + "learning_rate": 5e-06, + "loss": 1.046, + "num_input_tokens_seen": 184217968, + "step": 1458 + }, + { + "epoch": 0.37401398063233504, + "loss": 1.1571398973464966, + "loss_ce": 0.0008899496169760823, + "loss_iou": 0.52734375, + "loss_num": 0.0198974609375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 184217968, + "step": 1458 + }, + { + "epoch": 0.37427050599628037, + "grad_norm": 46.58449935913086, + "learning_rate": 5e-06, + "loss": 1.1735, + "num_input_tokens_seen": 184345016, + "step": 1459 + }, + { + "epoch": 0.37427050599628037, + "loss": 1.1300907135009766, + "loss_ce": 0.00020788460096810013, + "loss_iou": 0.5234375, + "loss_num": 0.01708984375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 184345016, + "step": 1459 + }, + { + "epoch": 0.37452703136022575, + "grad_norm": 72.1821060180664, + "learning_rate": 5e-06, + "loss": 1.3589, + "num_input_tokens_seen": 184471676, + "step": 1460 + }, + { + "epoch": 0.37452703136022575, + "loss": 1.181196689605713, + "loss_ce": 0.0015092871617525816, + "loss_iou": 0.546875, + "loss_num": 0.0174560546875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 184471676, + "step": 1460 + }, + { + "epoch": 0.37478355672417113, + "grad_norm": 50.933868408203125, + "learning_rate": 5e-06, + "loss": 1.2062, + "num_input_tokens_seen": 184598868, + "step": 1461 + }, + { + "epoch": 0.37478355672417113, + "loss": 1.2450307607650757, + "loss_ce": 0.0018667103722691536, + "loss_iou": 0.58984375, + "loss_num": 0.01324462890625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 184598868, + "step": 1461 + }, + { + "epoch": 0.37504008208811646, + "grad_norm": 31.51675033569336, + "learning_rate": 5e-06, + "loss": 1.1351, + "num_input_tokens_seen": 184725156, + "step": 1462 + }, + { + "epoch": 0.37504008208811646, + "loss": 1.248775839805603, + "loss_ce": 0.00219384185038507, + "loss_iou": 0.5703125, + "loss_num": 0.0206298828125, + "loss_xval": 1.25, + "num_input_tokens_seen": 184725156, + "step": 1462 + }, + { + "epoch": 0.37529660745206184, + "grad_norm": 50.46366882324219, + "learning_rate": 5e-06, + "loss": 1.1203, + "num_input_tokens_seen": 184851476, + "step": 1463 + }, + { + "epoch": 0.37529660745206184, + "loss": 1.2937703132629395, + "loss_ce": 0.0012898120330646634, + "loss_iou": 0.59375, + "loss_num": 0.0218505859375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 184851476, + "step": 1463 + }, + { + "epoch": 0.37555313281600716, + "grad_norm": 60.26002502441406, + "learning_rate": 5e-06, + "loss": 1.1225, + "num_input_tokens_seen": 184977272, + "step": 1464 + }, + { + "epoch": 0.37555313281600716, + "loss": 1.1190664768218994, + "loss_ce": 0.0013906274689361453, + "loss_iou": 0.515625, + "loss_num": 0.0169677734375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 184977272, + "step": 1464 + }, + { + "epoch": 0.37580965817995254, + "grad_norm": 73.34342193603516, + "learning_rate": 5e-06, + "loss": 1.0823, + "num_input_tokens_seen": 185104268, + "step": 1465 + }, + { + "epoch": 0.37580965817995254, + "loss": 1.18039071559906, + "loss_ce": 0.0021680148784071207, + "loss_iou": 0.53515625, + "loss_num": 0.0218505859375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 185104268, + "step": 1465 + }, + { + "epoch": 0.3760661835438979, + "grad_norm": 53.87993240356445, + "learning_rate": 5e-06, + "loss": 1.1936, + "num_input_tokens_seen": 185229720, + "step": 1466 + }, + { + "epoch": 0.3760661835438979, + "loss": 1.347795009613037, + "loss_ce": 0.0006270152516663074, + "loss_iou": 0.6171875, + "loss_num": 0.0228271484375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 185229720, + "step": 1466 + }, + { + "epoch": 0.37632270890784325, + "grad_norm": 35.32950973510742, + "learning_rate": 5e-06, + "loss": 1.1457, + "num_input_tokens_seen": 185357412, + "step": 1467 + }, + { + "epoch": 0.37632270890784325, + "loss": 0.9886486530303955, + "loss_ce": 0.0003673986648209393, + "loss_iou": 0.466796875, + "loss_num": 0.01129150390625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 185357412, + "step": 1467 + }, + { + "epoch": 0.37657923427178863, + "grad_norm": 41.81003952026367, + "learning_rate": 5e-06, + "loss": 1.1306, + "num_input_tokens_seen": 185483416, + "step": 1468 + }, + { + "epoch": 0.37657923427178863, + "loss": 0.9659301042556763, + "loss_ce": 0.0010863338829949498, + "loss_iou": 0.44140625, + "loss_num": 0.01611328125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 185483416, + "step": 1468 + }, + { + "epoch": 0.37683575963573396, + "grad_norm": 59.57676696777344, + "learning_rate": 5e-06, + "loss": 1.1317, + "num_input_tokens_seen": 185609440, + "step": 1469 + }, + { + "epoch": 0.37683575963573396, + "loss": 0.9435204267501831, + "loss_ce": 0.0006493524415418506, + "loss_iou": 0.435546875, + "loss_num": 0.0145263671875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 185609440, + "step": 1469 + }, + { + "epoch": 0.37709228499967934, + "grad_norm": 58.61503982543945, + "learning_rate": 5e-06, + "loss": 1.1535, + "num_input_tokens_seen": 185735728, + "step": 1470 + }, + { + "epoch": 0.37709228499967934, + "loss": 1.3372206687927246, + "loss_ce": 0.008607424795627594, + "loss_iou": 0.578125, + "loss_num": 0.033935546875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 185735728, + "step": 1470 + }, + { + "epoch": 0.3773488103636247, + "grad_norm": 69.69792175292969, + "learning_rate": 5e-06, + "loss": 1.1821, + "num_input_tokens_seen": 185862328, + "step": 1471 + }, + { + "epoch": 0.3773488103636247, + "loss": 1.2412822246551514, + "loss_ce": 0.000559644540771842, + "loss_iou": 0.5859375, + "loss_num": 0.01422119140625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 185862328, + "step": 1471 + }, + { + "epoch": 0.37760533572757005, + "grad_norm": 57.6271858215332, + "learning_rate": 5e-06, + "loss": 1.084, + "num_input_tokens_seen": 185989264, + "step": 1472 + }, + { + "epoch": 0.37760533572757005, + "loss": 1.087946891784668, + "loss_ce": 0.0005444984999485314, + "loss_iou": 0.5078125, + "loss_num": 0.01397705078125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 185989264, + "step": 1472 + }, + { + "epoch": 0.3778618610915154, + "grad_norm": 42.4777946472168, + "learning_rate": 5e-06, + "loss": 1.1132, + "num_input_tokens_seen": 186116152, + "step": 1473 + }, + { + "epoch": 0.3778618610915154, + "loss": 1.2301592826843262, + "loss_ce": 0.0033527114428579807, + "loss_iou": 0.5625, + "loss_num": 0.0201416015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 186116152, + "step": 1473 + }, + { + "epoch": 0.3781183864554608, + "grad_norm": 58.11088943481445, + "learning_rate": 5e-06, + "loss": 1.1675, + "num_input_tokens_seen": 186242344, + "step": 1474 + }, + { + "epoch": 0.3781183864554608, + "loss": 1.3402596712112427, + "loss_ce": 0.0009042044403031468, + "loss_iou": 0.6015625, + "loss_num": 0.0269775390625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 186242344, + "step": 1474 + }, + { + "epoch": 0.37837491181940613, + "grad_norm": 92.21174621582031, + "learning_rate": 5e-06, + "loss": 1.2085, + "num_input_tokens_seen": 186368552, + "step": 1475 + }, + { + "epoch": 0.37837491181940613, + "loss": 1.2746590375900269, + "loss_ce": 0.002198141533881426, + "loss_iou": 0.58984375, + "loss_num": 0.017822265625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 186368552, + "step": 1475 + }, + { + "epoch": 0.3786314371833515, + "grad_norm": 48.54583740234375, + "learning_rate": 5e-06, + "loss": 1.223, + "num_input_tokens_seen": 186494284, + "step": 1476 + }, + { + "epoch": 0.3786314371833515, + "loss": 1.3988313674926758, + "loss_ce": 0.006741541437804699, + "loss_iou": 0.62890625, + "loss_num": 0.0264892578125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 186494284, + "step": 1476 + }, + { + "epoch": 0.37888796254729684, + "grad_norm": 38.96225357055664, + "learning_rate": 5e-06, + "loss": 1.0842, + "num_input_tokens_seen": 186620304, + "step": 1477 + }, + { + "epoch": 0.37888796254729684, + "loss": 1.0690982341766357, + "loss_ce": 0.002691975561901927, + "loss_iou": 0.48828125, + "loss_num": 0.01806640625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 186620304, + "step": 1477 + }, + { + "epoch": 0.3791444879112422, + "grad_norm": 53.8547248840332, + "learning_rate": 5e-06, + "loss": 0.9833, + "num_input_tokens_seen": 186747888, + "step": 1478 + }, + { + "epoch": 0.3791444879112422, + "loss": 0.8739967346191406, + "loss_ce": 0.002902979264035821, + "loss_iou": 0.408203125, + "loss_num": 0.010986328125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 186747888, + "step": 1478 + }, + { + "epoch": 0.3794010132751876, + "grad_norm": 51.58454132080078, + "learning_rate": 5e-06, + "loss": 1.0796, + "num_input_tokens_seen": 186874780, + "step": 1479 + }, + { + "epoch": 0.3794010132751876, + "loss": 1.0766490697860718, + "loss_ce": 0.002430299762636423, + "loss_iou": 0.478515625, + "loss_num": 0.023193359375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 186874780, + "step": 1479 + }, + { + "epoch": 0.37965753863913293, + "grad_norm": 62.28803253173828, + "learning_rate": 5e-06, + "loss": 1.0719, + "num_input_tokens_seen": 187002092, + "step": 1480 + }, + { + "epoch": 0.37965753863913293, + "loss": 1.0872108936309814, + "loss_ce": 0.00029688942595385015, + "loss_iou": 0.5, + "loss_num": 0.0169677734375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 187002092, + "step": 1480 + }, + { + "epoch": 0.3799140640030783, + "grad_norm": 47.01852798461914, + "learning_rate": 5e-06, + "loss": 1.1707, + "num_input_tokens_seen": 187128668, + "step": 1481 + }, + { + "epoch": 0.3799140640030783, + "loss": 1.3619575500488281, + "loss_ce": 0.0016059414483606815, + "loss_iou": 0.61328125, + "loss_num": 0.0274658203125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 187128668, + "step": 1481 + }, + { + "epoch": 0.38017058936702364, + "grad_norm": 29.3968505859375, + "learning_rate": 5e-06, + "loss": 1.1119, + "num_input_tokens_seen": 187254764, + "step": 1482 + }, + { + "epoch": 0.38017058936702364, + "loss": 1.0216299295425415, + "loss_ce": 0.0030751931481063366, + "loss_iou": 0.48828125, + "loss_num": 0.008544921875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 187254764, + "step": 1482 + }, + { + "epoch": 0.380427114730969, + "grad_norm": 50.26612091064453, + "learning_rate": 5e-06, + "loss": 1.101, + "num_input_tokens_seen": 187380452, + "step": 1483 + }, + { + "epoch": 0.380427114730969, + "loss": 0.9717945456504822, + "loss_ce": 0.0015796525403857231, + "loss_iou": 0.46484375, + "loss_num": 0.007568359375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 187380452, + "step": 1483 + }, + { + "epoch": 0.3806836400949144, + "grad_norm": 109.7345199584961, + "learning_rate": 5e-06, + "loss": 1.2347, + "num_input_tokens_seen": 187507644, + "step": 1484 + }, + { + "epoch": 0.3806836400949144, + "loss": 1.1330649852752686, + "loss_ce": 0.00025251254555769265, + "loss_iou": 0.5390625, + "loss_num": 0.0106201171875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 187507644, + "step": 1484 + }, + { + "epoch": 0.3809401654588597, + "grad_norm": 71.36920928955078, + "learning_rate": 5e-06, + "loss": 1.1469, + "num_input_tokens_seen": 187635876, + "step": 1485 + }, + { + "epoch": 0.3809401654588597, + "loss": 1.1659294366836548, + "loss_ce": 0.002843515481799841, + "loss_iou": 0.5390625, + "loss_num": 0.0169677734375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 187635876, + "step": 1485 + }, + { + "epoch": 0.3811966908228051, + "grad_norm": 51.950374603271484, + "learning_rate": 5e-06, + "loss": 1.2401, + "num_input_tokens_seen": 187762348, + "step": 1486 + }, + { + "epoch": 0.3811966908228051, + "loss": 1.083819031715393, + "loss_ce": 0.0008112285286188126, + "loss_iou": 0.5078125, + "loss_num": 0.0130615234375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 187762348, + "step": 1486 + }, + { + "epoch": 0.3814532161867505, + "grad_norm": 26.825910568237305, + "learning_rate": 5e-06, + "loss": 1.1679, + "num_input_tokens_seen": 187889512, + "step": 1487 + }, + { + "epoch": 0.3814532161867505, + "loss": 1.0824724435806274, + "loss_ce": 0.0026385136879980564, + "loss_iou": 0.48828125, + "loss_num": 0.0205078125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 187889512, + "step": 1487 + }, + { + "epoch": 0.3817097415506958, + "grad_norm": 35.31439971923828, + "learning_rate": 5e-06, + "loss": 1.1302, + "num_input_tokens_seen": 188015532, + "step": 1488 + }, + { + "epoch": 0.3817097415506958, + "loss": 1.1531200408935547, + "loss_ce": 0.0007762362947687507, + "loss_iou": 0.51953125, + "loss_num": 0.0220947265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 188015532, + "step": 1488 + }, + { + "epoch": 0.3819662669146412, + "grad_norm": 48.783267974853516, + "learning_rate": 5e-06, + "loss": 0.9855, + "num_input_tokens_seen": 188142588, + "step": 1489 + }, + { + "epoch": 0.3819662669146412, + "loss": 1.179478645324707, + "loss_ce": 0.00125592271797359, + "loss_iou": 0.53515625, + "loss_num": 0.021484375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 188142588, + "step": 1489 + }, + { + "epoch": 0.3822227922785865, + "grad_norm": 59.11516189575195, + "learning_rate": 5e-06, + "loss": 1.1455, + "num_input_tokens_seen": 188268616, + "step": 1490 + }, + { + "epoch": 0.3822227922785865, + "loss": 1.342651128768921, + "loss_ce": 0.00231909635476768, + "loss_iou": 0.59765625, + "loss_num": 0.02880859375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 188268616, + "step": 1490 + }, + { + "epoch": 0.3824793176425319, + "grad_norm": 58.986289978027344, + "learning_rate": 5e-06, + "loss": 1.1642, + "num_input_tokens_seen": 188395472, + "step": 1491 + }, + { + "epoch": 0.3824793176425319, + "loss": 1.4730690717697144, + "loss_ce": 0.0013893836876377463, + "loss_iou": 0.671875, + "loss_num": 0.0262451171875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 188395472, + "step": 1491 + }, + { + "epoch": 0.3827358430064773, + "grad_norm": 63.35247802734375, + "learning_rate": 5e-06, + "loss": 1.0822, + "num_input_tokens_seen": 188521252, + "step": 1492 + }, + { + "epoch": 0.3827358430064773, + "loss": 1.1525726318359375, + "loss_ce": 0.0007171613397076726, + "loss_iou": 0.5234375, + "loss_num": 0.0205078125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 188521252, + "step": 1492 + }, + { + "epoch": 0.3829923683704226, + "grad_norm": 50.06898880004883, + "learning_rate": 5e-06, + "loss": 1.069, + "num_input_tokens_seen": 188647320, + "step": 1493 + }, + { + "epoch": 0.3829923683704226, + "loss": 0.908941924571991, + "loss_ce": 0.0007387791993096471, + "loss_iou": 0.435546875, + "loss_num": 0.00762939453125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 188647320, + "step": 1493 + }, + { + "epoch": 0.383248893734368, + "grad_norm": 45.221744537353516, + "learning_rate": 5e-06, + "loss": 1.1514, + "num_input_tokens_seen": 188772732, + "step": 1494 + }, + { + "epoch": 0.383248893734368, + "loss": 1.234305739402771, + "loss_ce": 0.004325295332819223, + "loss_iou": 0.5390625, + "loss_num": 0.0302734375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 188772732, + "step": 1494 + }, + { + "epoch": 0.38350541909831337, + "grad_norm": 53.2840461730957, + "learning_rate": 5e-06, + "loss": 1.0886, + "num_input_tokens_seen": 188898676, + "step": 1495 + }, + { + "epoch": 0.38350541909831337, + "loss": 1.0565247535705566, + "loss_ce": 0.0033020772971212864, + "loss_iou": 0.50390625, + "loss_num": 0.0096435546875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 188898676, + "step": 1495 + }, + { + "epoch": 0.3837619444622587, + "grad_norm": 78.18954467773438, + "learning_rate": 5e-06, + "loss": 1.181, + "num_input_tokens_seen": 189024624, + "step": 1496 + }, + { + "epoch": 0.3837619444622587, + "loss": 1.1217641830444336, + "loss_ce": 0.0006704367697238922, + "loss_iou": 0.51171875, + "loss_num": 0.0191650390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 189024624, + "step": 1496 + }, + { + "epoch": 0.3840184698262041, + "grad_norm": 51.146820068359375, + "learning_rate": 5e-06, + "loss": 1.2754, + "num_input_tokens_seen": 189151212, + "step": 1497 + }, + { + "epoch": 0.3840184698262041, + "loss": 1.4092732667922974, + "loss_ce": 0.004488097969442606, + "loss_iou": 0.62890625, + "loss_num": 0.0301513671875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 189151212, + "step": 1497 + }, + { + "epoch": 0.3842749951901494, + "grad_norm": 39.66246795654297, + "learning_rate": 5e-06, + "loss": 0.9884, + "num_input_tokens_seen": 189277892, + "step": 1498 + }, + { + "epoch": 0.3842749951901494, + "loss": 0.8523750305175781, + "loss_ce": 0.00032432383159175515, + "loss_iou": 0.408203125, + "loss_num": 0.0074462890625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 189277892, + "step": 1498 + }, + { + "epoch": 0.3845315205540948, + "grad_norm": 56.27462387084961, + "learning_rate": 5e-06, + "loss": 1.0781, + "num_input_tokens_seen": 189404836, + "step": 1499 + }, + { + "epoch": 0.3845315205540948, + "loss": 0.9103527069091797, + "loss_ce": 0.0006847254699096084, + "loss_iou": 0.4296875, + "loss_num": 0.01025390625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 189404836, + "step": 1499 + }, + { + "epoch": 0.38478804591804017, + "grad_norm": 76.12755584716797, + "learning_rate": 5e-06, + "loss": 1.0705, + "num_input_tokens_seen": 189530596, + "step": 1500 + }, + { + "epoch": 0.38478804591804017, + "eval_icons_CIoU": 0.11265752464532852, + "eval_icons_GIoU": 0.08986295387148857, + "eval_icons_IoU": 0.2963978126645088, + "eval_icons_MAE_all": 0.035035944543778896, + "eval_icons_MAE_h": 0.06094491295516491, + "eval_icons_MAE_w": 0.05346305854618549, + "eval_icons_MAE_x_boxes": 0.0489485040307045, + "eval_icons_MAE_y_boxes": 0.05883572995662689, + "eval_icons_NUM_probability": 0.9997245073318481, + "eval_icons_inside_bbox": 0.5225694477558136, + "eval_icons_loss": 1.9418941736221313, + "eval_icons_loss_ce": 0.0002810888981912285, + "eval_icons_loss_iou": 0.8802490234375, + "eval_icons_loss_num": 0.03933525085449219, + "eval_icons_loss_xval": 1.957763671875, + "eval_icons_runtime": 63.2423, + "eval_icons_samples_per_second": 0.791, + "eval_icons_steps_per_second": 0.032, + "num_input_tokens_seen": 189530596, + "step": 1500 + }, + { + "epoch": 0.38478804591804017, + "eval_screenspot_CIoU": 0.11906857788562775, + "eval_screenspot_GIoU": 0.10873545954624812, + "eval_screenspot_IoU": 0.2889045178890228, + "eval_screenspot_MAE_all": 0.0802874465783437, + "eval_screenspot_MAE_h": 0.058682166039943695, + "eval_screenspot_MAE_w": 0.128630168735981, + "eval_screenspot_MAE_x_boxes": 0.10889026025931041, + "eval_screenspot_MAE_y_boxes": 0.05938880269726118, + "eval_screenspot_NUM_probability": 0.9998126228650411, + "eval_screenspot_inside_bbox": 0.6016666690508524, + "eval_screenspot_loss": 2.2293777465820312, + "eval_screenspot_loss_ce": 0.0027873129583895206, + "eval_screenspot_loss_iou": 0.9191080729166666, + "eval_screenspot_loss_num": 0.08681233723958333, + "eval_screenspot_loss_xval": 2.2721354166666665, + "eval_screenspot_runtime": 109.1977, + "eval_screenspot_samples_per_second": 0.815, + "eval_screenspot_steps_per_second": 0.027, + "num_input_tokens_seen": 189530596, + "step": 1500 + }, + { + "epoch": 0.38478804591804017, + "loss": 2.201124668121338, + "loss_ce": 0.0019058401230722666, + "loss_iou": 0.8984375, + "loss_num": 0.080078125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 189530596, + "step": 1500 + }, + { + "epoch": 0.3850445712819855, + "grad_norm": 61.09836959838867, + "learning_rate": 5e-06, + "loss": 1.1602, + "num_input_tokens_seen": 189658140, + "step": 1501 + }, + { + "epoch": 0.3850445712819855, + "loss": 1.015412449836731, + "loss_ce": 0.0002757255861070007, + "loss_iou": 0.478515625, + "loss_num": 0.01177978515625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 189658140, + "step": 1501 + }, + { + "epoch": 0.3853010966459309, + "grad_norm": 52.31174850463867, + "learning_rate": 5e-06, + "loss": 1.0971, + "num_input_tokens_seen": 189785788, + "step": 1502 + }, + { + "epoch": 0.3853010966459309, + "loss": 1.1657353639602661, + "loss_ce": 0.002161166165024042, + "loss_iou": 0.5390625, + "loss_num": 0.0166015625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 189785788, + "step": 1502 + }, + { + "epoch": 0.3855576220098762, + "grad_norm": 50.60268783569336, + "learning_rate": 5e-06, + "loss": 1.2291, + "num_input_tokens_seen": 189911632, + "step": 1503 + }, + { + "epoch": 0.3855576220098762, + "loss": 1.2179605960845947, + "loss_ce": 0.001651977770961821, + "loss_iou": 0.5703125, + "loss_num": 0.014892578125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 189911632, + "step": 1503 + }, + { + "epoch": 0.3858141473738216, + "grad_norm": 48.19023895263672, + "learning_rate": 5e-06, + "loss": 1.0979, + "num_input_tokens_seen": 190036728, + "step": 1504 + }, + { + "epoch": 0.3858141473738216, + "loss": 0.8764955401420593, + "loss_ce": 0.0010072184959426522, + "loss_iou": 0.416015625, + "loss_num": 0.0087890625, + "loss_xval": 0.875, + "num_input_tokens_seen": 190036728, + "step": 1504 + }, + { + "epoch": 0.38607067273776696, + "grad_norm": 63.0456428527832, + "learning_rate": 5e-06, + "loss": 1.1139, + "num_input_tokens_seen": 190163360, + "step": 1505 + }, + { + "epoch": 0.38607067273776696, + "loss": 0.9802591800689697, + "loss_ce": 0.00027872496866621077, + "loss_iou": 0.4609375, + "loss_num": 0.011474609375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 190163360, + "step": 1505 + }, + { + "epoch": 0.3863271981017123, + "grad_norm": 76.94174194335938, + "learning_rate": 5e-06, + "loss": 1.0723, + "num_input_tokens_seen": 190289624, + "step": 1506 + }, + { + "epoch": 0.3863271981017123, + "loss": 1.128048300743103, + "loss_ce": 0.0015834597870707512, + "loss_iou": 0.51953125, + "loss_num": 0.016845703125, + "loss_xval": 1.125, + "num_input_tokens_seen": 190289624, + "step": 1506 + }, + { + "epoch": 0.38658372346565767, + "grad_norm": 58.044837951660156, + "learning_rate": 5e-06, + "loss": 1.1987, + "num_input_tokens_seen": 190416452, + "step": 1507 + }, + { + "epoch": 0.38658372346565767, + "loss": 1.2507119178771973, + "loss_ce": 0.0007119464571587741, + "loss_iou": 0.55859375, + "loss_num": 0.02685546875, + "loss_xval": 1.25, + "num_input_tokens_seen": 190416452, + "step": 1507 + }, + { + "epoch": 0.38684024882960305, + "grad_norm": 38.8647575378418, + "learning_rate": 5e-06, + "loss": 1.1747, + "num_input_tokens_seen": 190541876, + "step": 1508 + }, + { + "epoch": 0.38684024882960305, + "loss": 1.027365803718567, + "loss_ce": 0.002463455544784665, + "loss_iou": 0.466796875, + "loss_num": 0.018310546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 190541876, + "step": 1508 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 61.25758743286133, + "learning_rate": 5e-06, + "loss": 1.0612, + "num_input_tokens_seen": 190668100, + "step": 1509 + }, + { + "epoch": 0.3870967741935484, + "loss": 1.148411512374878, + "loss_ce": 0.003392076352611184, + "loss_iou": 0.52734375, + "loss_num": 0.0179443359375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 190668100, + "step": 1509 + }, + { + "epoch": 0.38735329955749376, + "grad_norm": 92.65426635742188, + "learning_rate": 5e-06, + "loss": 1.1023, + "num_input_tokens_seen": 190794680, + "step": 1510 + }, + { + "epoch": 0.38735329955749376, + "loss": 1.120812177658081, + "loss_ce": 0.0016715934034436941, + "loss_iou": 0.5078125, + "loss_num": 0.0211181640625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 190794680, + "step": 1510 + }, + { + "epoch": 0.3876098249214391, + "grad_norm": 52.11134338378906, + "learning_rate": 5e-06, + "loss": 1.3463, + "num_input_tokens_seen": 190921504, + "step": 1511 + }, + { + "epoch": 0.3876098249214391, + "loss": 1.367532730102539, + "loss_ce": 0.0037630663719028234, + "loss_iou": 0.62109375, + "loss_num": 0.0245361328125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 190921504, + "step": 1511 + }, + { + "epoch": 0.38786635028538446, + "grad_norm": 32.84667205810547, + "learning_rate": 5e-06, + "loss": 1.1584, + "num_input_tokens_seen": 191046412, + "step": 1512 + }, + { + "epoch": 0.38786635028538446, + "loss": 1.3258578777313232, + "loss_ce": 0.0006626513786613941, + "loss_iou": 0.609375, + "loss_num": 0.0213623046875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 191046412, + "step": 1512 + }, + { + "epoch": 0.38812287564932985, + "grad_norm": 47.48963928222656, + "learning_rate": 5e-06, + "loss": 1.0965, + "num_input_tokens_seen": 191172548, + "step": 1513 + }, + { + "epoch": 0.38812287564932985, + "loss": 1.0604231357574463, + "loss_ce": 0.0008528043399564922, + "loss_iou": 0.478515625, + "loss_num": 0.0206298828125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 191172548, + "step": 1513 + }, + { + "epoch": 0.38837940101327517, + "grad_norm": 51.24031066894531, + "learning_rate": 5e-06, + "loss": 1.1738, + "num_input_tokens_seen": 191297372, + "step": 1514 + }, + { + "epoch": 0.38837940101327517, + "loss": 1.123929500579834, + "loss_ce": 0.0008825291297398508, + "loss_iou": 0.51171875, + "loss_num": 0.020263671875, + "loss_xval": 1.125, + "num_input_tokens_seen": 191297372, + "step": 1514 + }, + { + "epoch": 0.38863592637722055, + "grad_norm": 77.43284606933594, + "learning_rate": 5e-06, + "loss": 1.1405, + "num_input_tokens_seen": 191424232, + "step": 1515 + }, + { + "epoch": 0.38863592637722055, + "loss": 1.0486277341842651, + "loss_ce": 0.0012644442031159997, + "loss_iou": 0.490234375, + "loss_num": 0.01336669921875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 191424232, + "step": 1515 + }, + { + "epoch": 0.38889245174116593, + "grad_norm": 43.15481948852539, + "learning_rate": 5e-06, + "loss": 1.2764, + "num_input_tokens_seen": 191550568, + "step": 1516 + }, + { + "epoch": 0.38889245174116593, + "loss": 1.3104968070983887, + "loss_ce": 0.0031238049268722534, + "loss_iou": 0.578125, + "loss_num": 0.029296875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 191550568, + "step": 1516 + }, + { + "epoch": 0.38914897710511126, + "grad_norm": 36.84035873413086, + "learning_rate": 5e-06, + "loss": 1.0121, + "num_input_tokens_seen": 191677516, + "step": 1517 + }, + { + "epoch": 0.38914897710511126, + "loss": 0.9524126648902893, + "loss_ce": 0.002217363566160202, + "loss_iou": 0.443359375, + "loss_num": 0.0125732421875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 191677516, + "step": 1517 + }, + { + "epoch": 0.38940550246905664, + "grad_norm": 74.81336212158203, + "learning_rate": 5e-06, + "loss": 1.0434, + "num_input_tokens_seen": 191803800, + "step": 1518 + }, + { + "epoch": 0.38940550246905664, + "loss": 1.0824499130249023, + "loss_ce": 0.0018835681257769465, + "loss_iou": 0.486328125, + "loss_num": 0.0216064453125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 191803800, + "step": 1518 + }, + { + "epoch": 0.38966202783300197, + "grad_norm": 56.38863754272461, + "learning_rate": 5e-06, + "loss": 1.2743, + "num_input_tokens_seen": 191929768, + "step": 1519 + }, + { + "epoch": 0.38966202783300197, + "loss": 1.2586743831634521, + "loss_ce": 0.00037358838017098606, + "loss_iou": 0.5859375, + "loss_num": 0.017578125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 191929768, + "step": 1519 + }, + { + "epoch": 0.38991855319694735, + "grad_norm": 37.75717544555664, + "learning_rate": 5e-06, + "loss": 1.1706, + "num_input_tokens_seen": 192055244, + "step": 1520 + }, + { + "epoch": 0.38991855319694735, + "loss": 1.347277045249939, + "loss_ce": 0.0025504936929792166, + "loss_iou": 0.60546875, + "loss_num": 0.026123046875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 192055244, + "step": 1520 + }, + { + "epoch": 0.39017507856089273, + "grad_norm": 42.09926986694336, + "learning_rate": 5e-06, + "loss": 1.3359, + "num_input_tokens_seen": 192181076, + "step": 1521 + }, + { + "epoch": 0.39017507856089273, + "loss": 1.2312599420547485, + "loss_ce": 0.0012794963549822569, + "loss_iou": 0.55859375, + "loss_num": 0.0228271484375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 192181076, + "step": 1521 + }, + { + "epoch": 0.39043160392483806, + "grad_norm": 45.43294143676758, + "learning_rate": 5e-06, + "loss": 1.0579, + "num_input_tokens_seen": 192306704, + "step": 1522 + }, + { + "epoch": 0.39043160392483806, + "loss": 1.036280870437622, + "loss_ce": 0.0006363015854731202, + "loss_iou": 0.484375, + "loss_num": 0.012939453125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 192306704, + "step": 1522 + }, + { + "epoch": 0.39068812928878344, + "grad_norm": 75.3431167602539, + "learning_rate": 5e-06, + "loss": 1.2175, + "num_input_tokens_seen": 192432336, + "step": 1523 + }, + { + "epoch": 0.39068812928878344, + "loss": 1.1916499137878418, + "loss_ce": 0.002196761779487133, + "loss_iou": 0.546875, + "loss_num": 0.019775390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 192432336, + "step": 1523 + }, + { + "epoch": 0.39094465465272876, + "grad_norm": 65.03369140625, + "learning_rate": 5e-06, + "loss": 1.3476, + "num_input_tokens_seen": 192558840, + "step": 1524 + }, + { + "epoch": 0.39094465465272876, + "loss": 1.5129880905151367, + "loss_ce": 0.005175682716071606, + "loss_iou": 0.671875, + "loss_num": 0.03271484375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 192558840, + "step": 1524 + }, + { + "epoch": 0.39120118001667414, + "grad_norm": 31.224037170410156, + "learning_rate": 5e-06, + "loss": 0.9564, + "num_input_tokens_seen": 192685456, + "step": 1525 + }, + { + "epoch": 0.39120118001667414, + "loss": 1.0182437896728516, + "loss_ce": 0.0031070492696017027, + "loss_iou": 0.45703125, + "loss_num": 0.0205078125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 192685456, + "step": 1525 + }, + { + "epoch": 0.3914577053806195, + "grad_norm": 65.39073181152344, + "learning_rate": 5e-06, + "loss": 1.2224, + "num_input_tokens_seen": 192811228, + "step": 1526 + }, + { + "epoch": 0.3914577053806195, + "loss": 1.2271161079406738, + "loss_ce": 0.0005535732489079237, + "loss_iou": 0.55859375, + "loss_num": 0.0223388671875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 192811228, + "step": 1526 + }, + { + "epoch": 0.39171423074456485, + "grad_norm": 61.2736701965332, + "learning_rate": 5e-06, + "loss": 1.1001, + "num_input_tokens_seen": 192938140, + "step": 1527 + }, + { + "epoch": 0.39171423074456485, + "loss": 0.9914830327033997, + "loss_ce": 0.002713508205488324, + "loss_iou": 0.45703125, + "loss_num": 0.01495361328125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 192938140, + "step": 1527 + }, + { + "epoch": 0.39197075610851023, + "grad_norm": 51.813697814941406, + "learning_rate": 5e-06, + "loss": 1.0995, + "num_input_tokens_seen": 193065036, + "step": 1528 + }, + { + "epoch": 0.39197075610851023, + "loss": 0.9853127598762512, + "loss_ce": 0.00044953409815207124, + "loss_iou": 0.458984375, + "loss_num": 0.012939453125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 193065036, + "step": 1528 + }, + { + "epoch": 0.3922272814724556, + "grad_norm": 38.03799819946289, + "learning_rate": 5e-06, + "loss": 1.054, + "num_input_tokens_seen": 193190448, + "step": 1529 + }, + { + "epoch": 0.3922272814724556, + "loss": 1.145242691040039, + "loss_ce": 0.0007113935425877571, + "loss_iou": 0.52734375, + "loss_num": 0.0174560546875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 193190448, + "step": 1529 + }, + { + "epoch": 0.39248380683640094, + "grad_norm": 42.415897369384766, + "learning_rate": 5e-06, + "loss": 1.0583, + "num_input_tokens_seen": 193316484, + "step": 1530 + }, + { + "epoch": 0.39248380683640094, + "loss": 1.1623746156692505, + "loss_ce": 0.0007535091135650873, + "loss_iou": 0.5234375, + "loss_num": 0.0235595703125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 193316484, + "step": 1530 + }, + { + "epoch": 0.3927403322003463, + "grad_norm": 51.46474075317383, + "learning_rate": 5e-06, + "loss": 1.1727, + "num_input_tokens_seen": 193443416, + "step": 1531 + }, + { + "epoch": 0.3927403322003463, + "loss": 1.1363688707351685, + "loss_ce": 0.002091593574732542, + "loss_iou": 0.51171875, + "loss_num": 0.0225830078125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 193443416, + "step": 1531 + }, + { + "epoch": 0.39299685756429165, + "grad_norm": 70.49765014648438, + "learning_rate": 5e-06, + "loss": 1.0976, + "num_input_tokens_seen": 193569044, + "step": 1532 + }, + { + "epoch": 0.39299685756429165, + "loss": 1.1650943756103516, + "loss_ce": 0.0005436567589640617, + "loss_iou": 0.5390625, + "loss_num": 0.017333984375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 193569044, + "step": 1532 + }, + { + "epoch": 0.393253382928237, + "grad_norm": 35.72270584106445, + "learning_rate": 5e-06, + "loss": 1.1378, + "num_input_tokens_seen": 193693488, + "step": 1533 + }, + { + "epoch": 0.393253382928237, + "loss": 1.165785312652588, + "loss_ce": 0.0022111451253294945, + "loss_iou": 0.53515625, + "loss_num": 0.01904296875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 193693488, + "step": 1533 + }, + { + "epoch": 0.3935099082921824, + "grad_norm": 36.461997985839844, + "learning_rate": 5e-06, + "loss": 1.0489, + "num_input_tokens_seen": 193821060, + "step": 1534 + }, + { + "epoch": 0.3935099082921824, + "loss": 1.1525335311889648, + "loss_ce": 0.002142914105206728, + "loss_iou": 0.51953125, + "loss_num": 0.0228271484375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 193821060, + "step": 1534 + }, + { + "epoch": 0.39376643365612773, + "grad_norm": 69.18570709228516, + "learning_rate": 5e-06, + "loss": 1.0326, + "num_input_tokens_seen": 193947016, + "step": 1535 + }, + { + "epoch": 0.39376643365612773, + "loss": 1.072740077972412, + "loss_ce": 0.0004743871686514467, + "loss_iou": 0.49609375, + "loss_num": 0.0164794921875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 193947016, + "step": 1535 + }, + { + "epoch": 0.3940229590200731, + "grad_norm": 53.863407135009766, + "learning_rate": 5e-06, + "loss": 1.2393, + "num_input_tokens_seen": 194073124, + "step": 1536 + }, + { + "epoch": 0.3940229590200731, + "loss": 1.1016266345977783, + "loss_ce": 0.001040624687448144, + "loss_iou": 0.5078125, + "loss_num": 0.0172119140625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 194073124, + "step": 1536 + }, + { + "epoch": 0.3942794843840185, + "grad_norm": 30.350656509399414, + "learning_rate": 5e-06, + "loss": 1.0471, + "num_input_tokens_seen": 194199636, + "step": 1537 + }, + { + "epoch": 0.3942794843840185, + "loss": 1.0734589099884033, + "loss_ce": 0.00021668878616765141, + "loss_iou": 0.49609375, + "loss_num": 0.016357421875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 194199636, + "step": 1537 + }, + { + "epoch": 0.3945360097479638, + "grad_norm": 37.291587829589844, + "learning_rate": 5e-06, + "loss": 0.9813, + "num_input_tokens_seen": 194326176, + "step": 1538 + }, + { + "epoch": 0.3945360097479638, + "loss": 1.0740973949432373, + "loss_ce": 0.0037849380169063807, + "loss_iou": 0.49609375, + "loss_num": 0.01611328125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 194326176, + "step": 1538 + }, + { + "epoch": 0.3947925351119092, + "grad_norm": 46.24781799316406, + "learning_rate": 5e-06, + "loss": 1.2387, + "num_input_tokens_seen": 194452456, + "step": 1539 + }, + { + "epoch": 0.3947925351119092, + "loss": 1.2634050846099854, + "loss_ce": 0.003639496862888336, + "loss_iou": 0.58984375, + "loss_num": 0.015625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 194452456, + "step": 1539 + }, + { + "epoch": 0.39504906047585453, + "grad_norm": 65.65367889404297, + "learning_rate": 5e-06, + "loss": 1.0808, + "num_input_tokens_seen": 194578084, + "step": 1540 + }, + { + "epoch": 0.39504906047585453, + "loss": 0.951698899269104, + "loss_ce": 0.0010153321782127023, + "loss_iou": 0.4453125, + "loss_num": 0.01226806640625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 194578084, + "step": 1540 + }, + { + "epoch": 0.3953055858397999, + "grad_norm": 49.97622299194336, + "learning_rate": 5e-06, + "loss": 1.322, + "num_input_tokens_seen": 194704076, + "step": 1541 + }, + { + "epoch": 0.3953055858397999, + "loss": 1.2151434421539307, + "loss_ce": 0.002741089090704918, + "loss_iou": 0.53125, + "loss_num": 0.030517578125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 194704076, + "step": 1541 + }, + { + "epoch": 0.3955621112037453, + "grad_norm": 65.10550689697266, + "learning_rate": 5e-06, + "loss": 0.9256, + "num_input_tokens_seen": 194830920, + "step": 1542 + }, + { + "epoch": 0.3955621112037453, + "loss": 1.0185145139694214, + "loss_ce": 0.0024011863861232996, + "loss_iou": 0.44921875, + "loss_num": 0.0233154296875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 194830920, + "step": 1542 + }, + { + "epoch": 0.3958186365676906, + "grad_norm": 58.70677947998047, + "learning_rate": 5e-06, + "loss": 1.2802, + "num_input_tokens_seen": 194957768, + "step": 1543 + }, + { + "epoch": 0.3958186365676906, + "loss": 1.0494959354400635, + "loss_ce": 0.0006678862264379859, + "loss_iou": 0.49609375, + "loss_num": 0.01123046875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 194957768, + "step": 1543 + }, + { + "epoch": 0.396075161931636, + "grad_norm": 45.512657165527344, + "learning_rate": 5e-06, + "loss": 1.0285, + "num_input_tokens_seen": 195083768, + "step": 1544 + }, + { + "epoch": 0.396075161931636, + "loss": 1.1316341161727905, + "loss_ce": 0.0007747658528387547, + "loss_iou": 0.5234375, + "loss_num": 0.017333984375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 195083768, + "step": 1544 + }, + { + "epoch": 0.3963316872955813, + "grad_norm": 63.16133499145508, + "learning_rate": 5e-06, + "loss": 1.1437, + "num_input_tokens_seen": 195210484, + "step": 1545 + }, + { + "epoch": 0.3963316872955813, + "loss": 1.3586812019348145, + "loss_ce": 0.0046771918423473835, + "loss_iou": 0.625, + "loss_num": 0.021484375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 195210484, + "step": 1545 + }, + { + "epoch": 0.3965882126595267, + "grad_norm": 82.78475189208984, + "learning_rate": 5e-06, + "loss": 1.0666, + "num_input_tokens_seen": 195337036, + "step": 1546 + }, + { + "epoch": 0.3965882126595267, + "loss": 1.1015851497650146, + "loss_ce": 0.000999275827780366, + "loss_iou": 0.50390625, + "loss_num": 0.018310546875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 195337036, + "step": 1546 + }, + { + "epoch": 0.3968447380234721, + "grad_norm": 52.03101348876953, + "learning_rate": 5e-06, + "loss": 1.208, + "num_input_tokens_seen": 195463500, + "step": 1547 + }, + { + "epoch": 0.3968447380234721, + "loss": 1.0965516567230225, + "loss_ce": 0.0037782436702400446, + "loss_iou": 0.51953125, + "loss_num": 0.010498046875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 195463500, + "step": 1547 + }, + { + "epoch": 0.3971012633874174, + "grad_norm": 27.089580535888672, + "learning_rate": 5e-06, + "loss": 1.1511, + "num_input_tokens_seen": 195588292, + "step": 1548 + }, + { + "epoch": 0.3971012633874174, + "loss": 1.3309563398361206, + "loss_ce": 0.0033195768482983112, + "loss_iou": 0.5859375, + "loss_num": 0.030517578125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 195588292, + "step": 1548 + }, + { + "epoch": 0.3973577887513628, + "grad_norm": 35.71665573120117, + "learning_rate": 5e-06, + "loss": 1.1771, + "num_input_tokens_seen": 195714520, + "step": 1549 + }, + { + "epoch": 0.3973577887513628, + "loss": 1.2118616104125977, + "loss_ce": 0.003365545067936182, + "loss_iou": 0.55859375, + "loss_num": 0.0181884765625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 195714520, + "step": 1549 + }, + { + "epoch": 0.3976143141153082, + "grad_norm": 46.36763381958008, + "learning_rate": 5e-06, + "loss": 1.0423, + "num_input_tokens_seen": 195842196, + "step": 1550 + }, + { + "epoch": 0.3976143141153082, + "loss": 0.8938024640083313, + "loss_ce": 0.0002477540110703558, + "loss_iou": 0.416015625, + "loss_num": 0.01220703125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 195842196, + "step": 1550 + }, + { + "epoch": 0.3978708394792535, + "grad_norm": 43.3109016418457, + "learning_rate": 5e-06, + "loss": 1.0645, + "num_input_tokens_seen": 195967416, + "step": 1551 + }, + { + "epoch": 0.3978708394792535, + "loss": 1.0396095514297485, + "loss_ce": 0.000547111383639276, + "loss_iou": 0.4921875, + "loss_num": 0.010498046875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 195967416, + "step": 1551 + }, + { + "epoch": 0.3981273648431989, + "grad_norm": 45.22679138183594, + "learning_rate": 5e-06, + "loss": 1.109, + "num_input_tokens_seen": 196093916, + "step": 1552 + }, + { + "epoch": 0.3981273648431989, + "loss": 1.15860116481781, + "loss_ce": 0.0003980428446084261, + "loss_iou": 0.54296875, + "loss_num": 0.01422119140625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 196093916, + "step": 1552 + }, + { + "epoch": 0.3983838902071442, + "grad_norm": 67.62503814697266, + "learning_rate": 5e-06, + "loss": 1.242, + "num_input_tokens_seen": 196220872, + "step": 1553 + }, + { + "epoch": 0.3983838902071442, + "loss": 1.1265982389450073, + "loss_ce": 0.004039607010781765, + "loss_iou": 0.50390625, + "loss_num": 0.0235595703125, + "loss_xval": 1.125, + "num_input_tokens_seen": 196220872, + "step": 1553 + }, + { + "epoch": 0.3986404155710896, + "grad_norm": 39.299259185791016, + "learning_rate": 5e-06, + "loss": 1.2042, + "num_input_tokens_seen": 196346364, + "step": 1554 + }, + { + "epoch": 0.3986404155710896, + "loss": 1.0823338031768799, + "loss_ce": 0.0027439750265330076, + "loss_iou": 0.50390625, + "loss_num": 0.01416015625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 196346364, + "step": 1554 + }, + { + "epoch": 0.39889694093503497, + "grad_norm": 24.284473419189453, + "learning_rate": 5e-06, + "loss": 1.0085, + "num_input_tokens_seen": 196471824, + "step": 1555 + }, + { + "epoch": 0.39889694093503497, + "loss": 0.8962870836257935, + "loss_ce": 0.002732346998527646, + "loss_iou": 0.40625, + "loss_num": 0.015869140625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 196471824, + "step": 1555 + }, + { + "epoch": 0.3991534662989803, + "grad_norm": 29.332895278930664, + "learning_rate": 5e-06, + "loss": 1.0541, + "num_input_tokens_seen": 196598144, + "step": 1556 + }, + { + "epoch": 0.3991534662989803, + "loss": 0.8029073476791382, + "loss_ce": 0.0011495501967146993, + "loss_iou": 0.38671875, + "loss_num": 0.005950927734375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 196598144, + "step": 1556 + }, + { + "epoch": 0.3994099916629257, + "grad_norm": 51.90876770019531, + "learning_rate": 5e-06, + "loss": 1.0953, + "num_input_tokens_seen": 196724860, + "step": 1557 + }, + { + "epoch": 0.3994099916629257, + "loss": 1.1668106317520142, + "loss_ce": 0.0007949427817948163, + "loss_iou": 0.52734375, + "loss_num": 0.02197265625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 196724860, + "step": 1557 + }, + { + "epoch": 0.399666517026871, + "grad_norm": 53.316673278808594, + "learning_rate": 5e-06, + "loss": 1.1668, + "num_input_tokens_seen": 196851120, + "step": 1558 + }, + { + "epoch": 0.399666517026871, + "loss": 1.181157112121582, + "loss_ce": 0.000492969760671258, + "loss_iou": 0.54296875, + "loss_num": 0.019287109375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 196851120, + "step": 1558 + }, + { + "epoch": 0.3999230423908164, + "grad_norm": 70.81504821777344, + "learning_rate": 5e-06, + "loss": 1.0365, + "num_input_tokens_seen": 196977116, + "step": 1559 + }, + { + "epoch": 0.3999230423908164, + "loss": 1.151735782623291, + "loss_ce": 0.005251309368759394, + "loss_iou": 0.52734375, + "loss_num": 0.017578125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 196977116, + "step": 1559 + }, + { + "epoch": 0.40017956775476177, + "grad_norm": 63.709285736083984, + "learning_rate": 5e-06, + "loss": 1.2733, + "num_input_tokens_seen": 197102816, + "step": 1560 + }, + { + "epoch": 0.40017956775476177, + "loss": 1.3319530487060547, + "loss_ce": 0.002363146748393774, + "loss_iou": 0.61328125, + "loss_num": 0.0205078125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 197102816, + "step": 1560 + }, + { + "epoch": 0.4004360931187071, + "grad_norm": 24.85494613647461, + "learning_rate": 5e-06, + "loss": 1.0136, + "num_input_tokens_seen": 197229292, + "step": 1561 + }, + { + "epoch": 0.4004360931187071, + "loss": 0.9530725479125977, + "loss_ce": 0.00043580314377322793, + "loss_iou": 0.4453125, + "loss_num": 0.01220703125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 197229292, + "step": 1561 + }, + { + "epoch": 0.4006926184826525, + "grad_norm": 38.93208694458008, + "learning_rate": 5e-06, + "loss": 1.024, + "num_input_tokens_seen": 197355308, + "step": 1562 + }, + { + "epoch": 0.4006926184826525, + "loss": 0.915032148361206, + "loss_ce": 0.0009696767665445805, + "loss_iou": 0.4296875, + "loss_num": 0.0107421875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 197355308, + "step": 1562 + }, + { + "epoch": 0.40094914384659786, + "grad_norm": 53.105751037597656, + "learning_rate": 5e-06, + "loss": 1.1505, + "num_input_tokens_seen": 197482276, + "step": 1563 + }, + { + "epoch": 0.40094914384659786, + "loss": 1.196049451828003, + "loss_ce": 0.00415497412905097, + "loss_iou": 0.5390625, + "loss_num": 0.0234375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 197482276, + "step": 1563 + }, + { + "epoch": 0.4012056692105432, + "grad_norm": 62.04239273071289, + "learning_rate": 5e-06, + "loss": 1.1792, + "num_input_tokens_seen": 197609880, + "step": 1564 + }, + { + "epoch": 0.4012056692105432, + "loss": 1.3616387844085693, + "loss_ce": 0.0022638263180851936, + "loss_iou": 0.6015625, + "loss_num": 0.0311279296875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 197609880, + "step": 1564 + }, + { + "epoch": 0.40146219457448856, + "grad_norm": 98.88211822509766, + "learning_rate": 5e-06, + "loss": 1.195, + "num_input_tokens_seen": 197735528, + "step": 1565 + }, + { + "epoch": 0.40146219457448856, + "loss": 1.1786649227142334, + "loss_ce": 0.0014189048670232296, + "loss_iou": 0.546875, + "loss_num": 0.0172119140625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 197735528, + "step": 1565 + }, + { + "epoch": 0.4017187199384339, + "grad_norm": 54.32612609863281, + "learning_rate": 5e-06, + "loss": 1.3069, + "num_input_tokens_seen": 197862352, + "step": 1566 + }, + { + "epoch": 0.4017187199384339, + "loss": 1.5224307775497437, + "loss_ce": 0.005829181522130966, + "loss_iou": 0.6796875, + "loss_num": 0.03173828125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 197862352, + "step": 1566 + }, + { + "epoch": 0.40197524530237927, + "grad_norm": 38.99921417236328, + "learning_rate": 5e-06, + "loss": 1.0473, + "num_input_tokens_seen": 197988876, + "step": 1567 + }, + { + "epoch": 0.40197524530237927, + "loss": 1.09300696849823, + "loss_ce": 0.002674960531294346, + "loss_iou": 0.494140625, + "loss_num": 0.020263671875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 197988876, + "step": 1567 + }, + { + "epoch": 0.40223177066632465, + "grad_norm": 46.888946533203125, + "learning_rate": 5e-06, + "loss": 1.1427, + "num_input_tokens_seen": 198114624, + "step": 1568 + }, + { + "epoch": 0.40223177066632465, + "loss": 1.4389984607696533, + "loss_ce": 0.001498574623838067, + "loss_iou": 0.6171875, + "loss_num": 0.04052734375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 198114624, + "step": 1568 + }, + { + "epoch": 0.40248829603027, + "grad_norm": 45.29999923706055, + "learning_rate": 5e-06, + "loss": 1.0794, + "num_input_tokens_seen": 198241208, + "step": 1569 + }, + { + "epoch": 0.40248829603027, + "loss": 0.9347891807556152, + "loss_ce": 0.0031485140789300203, + "loss_iou": 0.439453125, + "loss_num": 0.01043701171875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 198241208, + "step": 1569 + }, + { + "epoch": 0.40274482139421536, + "grad_norm": 51.33820724487305, + "learning_rate": 5e-06, + "loss": 1.166, + "num_input_tokens_seen": 198367280, + "step": 1570 + }, + { + "epoch": 0.40274482139421536, + "loss": 1.223694920539856, + "loss_ce": 0.004456730093806982, + "loss_iou": 0.55859375, + "loss_num": 0.0205078125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 198367280, + "step": 1570 + }, + { + "epoch": 0.40300134675816074, + "grad_norm": 61.1533088684082, + "learning_rate": 5e-06, + "loss": 1.1844, + "num_input_tokens_seen": 198493992, + "step": 1571 + }, + { + "epoch": 0.40300134675816074, + "loss": 1.2196438312530518, + "loss_ce": 0.0013821612810716033, + "loss_iou": 0.56640625, + "loss_num": 0.0172119140625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 198493992, + "step": 1571 + }, + { + "epoch": 0.40325787212210606, + "grad_norm": 78.06536865234375, + "learning_rate": 5e-06, + "loss": 1.2488, + "num_input_tokens_seen": 198620276, + "step": 1572 + }, + { + "epoch": 0.40325787212210606, + "loss": 1.4699575901031494, + "loss_ce": 0.005602159537374973, + "loss_iou": 0.64453125, + "loss_num": 0.03515625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 198620276, + "step": 1572 + }, + { + "epoch": 0.40351439748605145, + "grad_norm": 48.317047119140625, + "learning_rate": 5e-06, + "loss": 1.0406, + "num_input_tokens_seen": 198745900, + "step": 1573 + }, + { + "epoch": 0.40351439748605145, + "loss": 1.0384116172790527, + "loss_ce": 0.0008139099809341133, + "loss_iou": 0.4921875, + "loss_num": 0.01007080078125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 198745900, + "step": 1573 + }, + { + "epoch": 0.40377092284999677, + "grad_norm": 34.022945404052734, + "learning_rate": 5e-06, + "loss": 1.1458, + "num_input_tokens_seen": 198872064, + "step": 1574 + }, + { + "epoch": 0.40377092284999677, + "loss": 1.1450846195220947, + "loss_ce": 0.004947975743561983, + "loss_iou": 0.515625, + "loss_num": 0.021240234375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 198872064, + "step": 1574 + }, + { + "epoch": 0.40402744821394215, + "grad_norm": 40.52445983886719, + "learning_rate": 5e-06, + "loss": 0.9889, + "num_input_tokens_seen": 198999540, + "step": 1575 + }, + { + "epoch": 0.40402744821394215, + "loss": 0.9268763661384583, + "loss_ce": 0.001095124171115458, + "loss_iou": 0.439453125, + "loss_num": 0.009033203125, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 198999540, + "step": 1575 + }, + { + "epoch": 0.40428397357788753, + "grad_norm": 65.02236938476562, + "learning_rate": 5e-06, + "loss": 1.0033, + "num_input_tokens_seen": 199127280, + "step": 1576 + }, + { + "epoch": 0.40428397357788753, + "loss": 1.0825482606887817, + "loss_ce": 0.00344668235629797, + "loss_iou": 0.494140625, + "loss_num": 0.01806640625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 199127280, + "step": 1576 + }, + { + "epoch": 0.40454049894183286, + "grad_norm": 56.176116943359375, + "learning_rate": 5e-06, + "loss": 1.1199, + "num_input_tokens_seen": 199254448, + "step": 1577 + }, + { + "epoch": 0.40454049894183286, + "loss": 1.0419844388961792, + "loss_ce": 0.0029219682328402996, + "loss_iou": 0.494140625, + "loss_num": 0.01019287109375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 199254448, + "step": 1577 + }, + { + "epoch": 0.40479702430577824, + "grad_norm": 46.91203308105469, + "learning_rate": 5e-06, + "loss": 1.1591, + "num_input_tokens_seen": 199379984, + "step": 1578 + }, + { + "epoch": 0.40479702430577824, + "loss": 1.2087640762329102, + "loss_ce": 0.003197699785232544, + "loss_iou": 0.546875, + "loss_num": 0.022705078125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 199379984, + "step": 1578 + }, + { + "epoch": 0.40505354966972357, + "grad_norm": 45.60390853881836, + "learning_rate": 5e-06, + "loss": 1.1384, + "num_input_tokens_seen": 199505764, + "step": 1579 + }, + { + "epoch": 0.40505354966972357, + "loss": 1.2366540431976318, + "loss_ce": 0.0008141376893036067, + "loss_iou": 0.57421875, + "loss_num": 0.017822265625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 199505764, + "step": 1579 + }, + { + "epoch": 0.40531007503366895, + "grad_norm": 68.7022476196289, + "learning_rate": 5e-06, + "loss": 1.1007, + "num_input_tokens_seen": 199632140, + "step": 1580 + }, + { + "epoch": 0.40531007503366895, + "loss": 1.1716147661209106, + "loss_ce": 0.00022805578191764653, + "loss_iou": 0.5390625, + "loss_num": 0.019287109375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 199632140, + "step": 1580 + }, + { + "epoch": 0.40556660039761433, + "grad_norm": 53.40279006958008, + "learning_rate": 5e-06, + "loss": 1.1485, + "num_input_tokens_seen": 199758440, + "step": 1581 + }, + { + "epoch": 0.40556660039761433, + "loss": 1.069993495941162, + "loss_ce": 0.0011458214139565825, + "loss_iou": 0.5, + "loss_num": 0.01361083984375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 199758440, + "step": 1581 + }, + { + "epoch": 0.40582312576155966, + "grad_norm": 53.18407440185547, + "learning_rate": 5e-06, + "loss": 0.9958, + "num_input_tokens_seen": 199885324, + "step": 1582 + }, + { + "epoch": 0.40582312576155966, + "loss": 0.8628374338150024, + "loss_ce": 0.0002885780995711684, + "loss_iou": 0.412109375, + "loss_num": 0.007476806640625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 199885324, + "step": 1582 + }, + { + "epoch": 0.40607965112550504, + "grad_norm": 41.17107391357422, + "learning_rate": 5e-06, + "loss": 1.0769, + "num_input_tokens_seen": 200011144, + "step": 1583 + }, + { + "epoch": 0.40607965112550504, + "loss": 1.1172876358032227, + "loss_ce": 0.0015650223940610886, + "loss_iou": 0.5, + "loss_num": 0.0228271484375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 200011144, + "step": 1583 + }, + { + "epoch": 0.4063361764894504, + "grad_norm": 69.88388061523438, + "learning_rate": 5e-06, + "loss": 1.0976, + "num_input_tokens_seen": 200136852, + "step": 1584 + }, + { + "epoch": 0.4063361764894504, + "loss": 1.15824294090271, + "loss_ce": 0.0015046991175040603, + "loss_iou": 0.5390625, + "loss_num": 0.0157470703125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 200136852, + "step": 1584 + }, + { + "epoch": 0.40659270185339574, + "grad_norm": 51.52262496948242, + "learning_rate": 5e-06, + "loss": 1.22, + "num_input_tokens_seen": 200263788, + "step": 1585 + }, + { + "epoch": 0.40659270185339574, + "loss": 1.2661281824111938, + "loss_ce": 0.001479707658290863, + "loss_iou": 0.5859375, + "loss_num": 0.0181884765625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 200263788, + "step": 1585 + }, + { + "epoch": 0.4068492272173411, + "grad_norm": 45.43435287475586, + "learning_rate": 5e-06, + "loss": 1.0949, + "num_input_tokens_seen": 200389908, + "step": 1586 + }, + { + "epoch": 0.4068492272173411, + "loss": 1.2181625366210938, + "loss_ce": 0.0018539582379162312, + "loss_iou": 0.5390625, + "loss_num": 0.027099609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 200389908, + "step": 1586 + }, + { + "epoch": 0.40710575258128645, + "grad_norm": 35.7471923828125, + "learning_rate": 5e-06, + "loss": 1.0233, + "num_input_tokens_seen": 200515100, + "step": 1587 + }, + { + "epoch": 0.40710575258128645, + "loss": 1.0286756753921509, + "loss_ce": 0.0003553498536348343, + "loss_iou": 0.482421875, + "loss_num": 0.0130615234375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 200515100, + "step": 1587 + }, + { + "epoch": 0.40736227794523183, + "grad_norm": 40.84055709838867, + "learning_rate": 5e-06, + "loss": 1.1475, + "num_input_tokens_seen": 200641964, + "step": 1588 + }, + { + "epoch": 0.40736227794523183, + "loss": 1.4442625045776367, + "loss_ce": 0.0028562629595398903, + "loss_iou": 0.6328125, + "loss_num": 0.03466796875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 200641964, + "step": 1588 + }, + { + "epoch": 0.4076188033091772, + "grad_norm": 60.79541015625, + "learning_rate": 5e-06, + "loss": 1.0728, + "num_input_tokens_seen": 200766608, + "step": 1589 + }, + { + "epoch": 0.4076188033091772, + "loss": 1.1367096900939941, + "loss_ce": 0.0019440683536231518, + "loss_iou": 0.52734375, + "loss_num": 0.01544189453125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 200766608, + "step": 1589 + }, + { + "epoch": 0.40787532867312254, + "grad_norm": 48.42425537109375, + "learning_rate": 5e-06, + "loss": 1.1214, + "num_input_tokens_seen": 200892912, + "step": 1590 + }, + { + "epoch": 0.40787532867312254, + "loss": 1.2832303047180176, + "loss_ce": 0.0010037871543318033, + "loss_iou": 0.60546875, + "loss_num": 0.014404296875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 200892912, + "step": 1590 + }, + { + "epoch": 0.4081318540370679, + "grad_norm": 69.77813720703125, + "learning_rate": 5e-06, + "loss": 1.0308, + "num_input_tokens_seen": 201019296, + "step": 1591 + }, + { + "epoch": 0.4081318540370679, + "loss": 0.9903916716575623, + "loss_ce": 0.0006455503171309829, + "loss_iou": 0.4609375, + "loss_num": 0.0135498046875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 201019296, + "step": 1591 + }, + { + "epoch": 0.4083883794010133, + "grad_norm": 53.29359817504883, + "learning_rate": 5e-06, + "loss": 1.1744, + "num_input_tokens_seen": 201146004, + "step": 1592 + }, + { + "epoch": 0.4083883794010133, + "loss": 1.1521239280700684, + "loss_ce": 0.0007567479624412954, + "loss_iou": 0.54296875, + "loss_num": 0.01300048828125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 201146004, + "step": 1592 + }, + { + "epoch": 0.4086449047649586, + "grad_norm": 32.17792510986328, + "learning_rate": 5e-06, + "loss": 1.1535, + "num_input_tokens_seen": 201271968, + "step": 1593 + }, + { + "epoch": 0.4086449047649586, + "loss": 1.0849266052246094, + "loss_ce": 0.00094219931634143, + "loss_iou": 0.50390625, + "loss_num": 0.0150146484375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 201271968, + "step": 1593 + }, + { + "epoch": 0.408901430128904, + "grad_norm": 50.247650146484375, + "learning_rate": 5e-06, + "loss": 1.0772, + "num_input_tokens_seen": 201397284, + "step": 1594 + }, + { + "epoch": 0.408901430128904, + "loss": 1.0865612030029297, + "loss_ce": 0.0016002136981114745, + "loss_iou": 0.50390625, + "loss_num": 0.0157470703125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 201397284, + "step": 1594 + }, + { + "epoch": 0.40915795549284933, + "grad_norm": 49.74248123168945, + "learning_rate": 5e-06, + "loss": 1.1077, + "num_input_tokens_seen": 201523388, + "step": 1595 + }, + { + "epoch": 0.40915795549284933, + "loss": 0.9415378570556641, + "loss_ce": 0.0011081373086199164, + "loss_iou": 0.443359375, + "loss_num": 0.01055908203125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 201523388, + "step": 1595 + }, + { + "epoch": 0.4094144808567947, + "grad_norm": 55.61701202392578, + "learning_rate": 5e-06, + "loss": 0.9612, + "num_input_tokens_seen": 201649944, + "step": 1596 + }, + { + "epoch": 0.4094144808567947, + "loss": 0.9368847012519836, + "loss_ce": 0.0018261116929352283, + "loss_iou": 0.423828125, + "loss_num": 0.017333984375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 201649944, + "step": 1596 + }, + { + "epoch": 0.4096710062207401, + "grad_norm": 54.47943878173828, + "learning_rate": 5e-06, + "loss": 1.2131, + "num_input_tokens_seen": 201776168, + "step": 1597 + }, + { + "epoch": 0.4096710062207401, + "loss": 1.2258821725845337, + "loss_ce": 0.001761116785928607, + "loss_iou": 0.56640625, + "loss_num": 0.0186767578125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 201776168, + "step": 1597 + }, + { + "epoch": 0.4099275315846854, + "grad_norm": 56.75238800048828, + "learning_rate": 5e-06, + "loss": 1.0234, + "num_input_tokens_seen": 201901948, + "step": 1598 + }, + { + "epoch": 0.4099275315846854, + "loss": 0.9030282497406006, + "loss_ce": 0.0006845340831205249, + "loss_iou": 0.419921875, + "loss_num": 0.0123291015625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 201901948, + "step": 1598 + }, + { + "epoch": 0.4101840569486308, + "grad_norm": 53.9236946105957, + "learning_rate": 5e-06, + "loss": 1.1868, + "num_input_tokens_seen": 202028004, + "step": 1599 + }, + { + "epoch": 0.4101840569486308, + "loss": 1.1269316673278809, + "loss_ce": 0.0019317497499287128, + "loss_iou": 0.51953125, + "loss_num": 0.0174560546875, + "loss_xval": 1.125, + "num_input_tokens_seen": 202028004, + "step": 1599 + }, + { + "epoch": 0.41044058231257613, + "grad_norm": 69.86937713623047, + "learning_rate": 5e-06, + "loss": 1.1545, + "num_input_tokens_seen": 202153988, + "step": 1600 + }, + { + "epoch": 0.41044058231257613, + "loss": 1.1669769287109375, + "loss_ce": 0.0004729431529995054, + "loss_iou": 0.54296875, + "loss_num": 0.01519775390625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 202153988, + "step": 1600 + }, + { + "epoch": 0.4106971076765215, + "grad_norm": 53.41269302368164, + "learning_rate": 5e-06, + "loss": 1.1701, + "num_input_tokens_seen": 202281020, + "step": 1601 + }, + { + "epoch": 0.4106971076765215, + "loss": 1.2026866674423218, + "loss_ce": 0.005421060137450695, + "loss_iou": 0.5546875, + "loss_num": 0.017333984375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 202281020, + "step": 1601 + }, + { + "epoch": 0.4109536330404669, + "grad_norm": 41.376346588134766, + "learning_rate": 5e-06, + "loss": 1.015, + "num_input_tokens_seen": 202408136, + "step": 1602 + }, + { + "epoch": 0.4109536330404669, + "loss": 0.8737397193908691, + "loss_ce": 0.0011811305303126574, + "loss_iou": 0.416015625, + "loss_num": 0.0078125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 202408136, + "step": 1602 + }, + { + "epoch": 0.4112101584044122, + "grad_norm": 38.93073272705078, + "learning_rate": 5e-06, + "loss": 0.9689, + "num_input_tokens_seen": 202534148, + "step": 1603 + }, + { + "epoch": 0.4112101584044122, + "loss": 0.7529886960983276, + "loss_ce": 0.0005472886259667575, + "loss_iou": 0.361328125, + "loss_num": 0.006256103515625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 202534148, + "step": 1603 + }, + { + "epoch": 0.4114666837683576, + "grad_norm": 75.21710205078125, + "learning_rate": 5e-06, + "loss": 0.98, + "num_input_tokens_seen": 202659680, + "step": 1604 + }, + { + "epoch": 0.4114666837683576, + "loss": 0.9929934740066528, + "loss_ce": 0.0008059820393100381, + "loss_iou": 0.458984375, + "loss_num": 0.01507568359375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 202659680, + "step": 1604 + }, + { + "epoch": 0.411723209132303, + "grad_norm": 53.58226776123047, + "learning_rate": 5e-06, + "loss": 1.1772, + "num_input_tokens_seen": 202785648, + "step": 1605 + }, + { + "epoch": 0.411723209132303, + "loss": 1.0735442638397217, + "loss_ce": 0.0007904045050963759, + "loss_iou": 0.5078125, + "loss_num": 0.01220703125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 202785648, + "step": 1605 + }, + { + "epoch": 0.4119797344962483, + "grad_norm": 31.25186538696289, + "learning_rate": 5e-06, + "loss": 1.0329, + "num_input_tokens_seen": 202912300, + "step": 1606 + }, + { + "epoch": 0.4119797344962483, + "loss": 0.8776917457580566, + "loss_ce": 0.0007385924109257758, + "loss_iou": 0.419921875, + "loss_num": 0.00726318359375, + "loss_xval": 0.875, + "num_input_tokens_seen": 202912300, + "step": 1606 + }, + { + "epoch": 0.4122362598601937, + "grad_norm": 45.73114776611328, + "learning_rate": 5e-06, + "loss": 1.0816, + "num_input_tokens_seen": 203038364, + "step": 1607 + }, + { + "epoch": 0.4122362598601937, + "loss": 1.0132834911346436, + "loss_ce": 0.00034398509887978435, + "loss_iou": 0.458984375, + "loss_num": 0.0194091796875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 203038364, + "step": 1607 + }, + { + "epoch": 0.412492785224139, + "grad_norm": 59.52323532104492, + "learning_rate": 5e-06, + "loss": 0.9806, + "num_input_tokens_seen": 203166004, + "step": 1608 + }, + { + "epoch": 0.412492785224139, + "loss": 1.1055126190185547, + "loss_ce": 0.0015087572392076254, + "loss_iou": 0.51953125, + "loss_num": 0.01336669921875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 203166004, + "step": 1608 + }, + { + "epoch": 0.4127493105880844, + "grad_norm": 65.99946594238281, + "learning_rate": 5e-06, + "loss": 1.1221, + "num_input_tokens_seen": 203292576, + "step": 1609 + }, + { + "epoch": 0.4127493105880844, + "loss": 1.1487829685211182, + "loss_ce": 0.002298548351973295, + "loss_iou": 0.5234375, + "loss_num": 0.019775390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 203292576, + "step": 1609 + }, + { + "epoch": 0.4130058359520298, + "grad_norm": 70.04087829589844, + "learning_rate": 5e-06, + "loss": 1.1481, + "num_input_tokens_seen": 203419412, + "step": 1610 + }, + { + "epoch": 0.4130058359520298, + "loss": 1.013358473777771, + "loss_ce": 0.0006632603472098708, + "loss_iou": 0.46875, + "loss_num": 0.01531982421875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 203419412, + "step": 1610 + }, + { + "epoch": 0.4132623613159751, + "grad_norm": 56.5489616394043, + "learning_rate": 5e-06, + "loss": 1.0291, + "num_input_tokens_seen": 203545660, + "step": 1611 + }, + { + "epoch": 0.4132623613159751, + "loss": 1.0631475448608398, + "loss_ce": 0.003088895697146654, + "loss_iou": 0.5, + "loss_num": 0.0120849609375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 203545660, + "step": 1611 + }, + { + "epoch": 0.4135188866799205, + "grad_norm": 52.033451080322266, + "learning_rate": 5e-06, + "loss": 1.0914, + "num_input_tokens_seen": 203672084, + "step": 1612 + }, + { + "epoch": 0.4135188866799205, + "loss": 1.0592870712280273, + "loss_ce": 0.0006933377590030432, + "loss_iou": 0.494140625, + "loss_num": 0.0140380859375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 203672084, + "step": 1612 + }, + { + "epoch": 0.41377541204386586, + "grad_norm": 71.56627655029297, + "learning_rate": 5e-06, + "loss": 1.1175, + "num_input_tokens_seen": 203799028, + "step": 1613 + }, + { + "epoch": 0.41377541204386586, + "loss": 0.9986370205879211, + "loss_ce": 0.00010188800661126152, + "loss_iou": 0.466796875, + "loss_num": 0.012939453125, + "loss_xval": 1.0, + "num_input_tokens_seen": 203799028, + "step": 1613 + }, + { + "epoch": 0.4140319374078112, + "grad_norm": 55.18699264526367, + "learning_rate": 5e-06, + "loss": 1.2273, + "num_input_tokens_seen": 203925644, + "step": 1614 + }, + { + "epoch": 0.4140319374078112, + "loss": 1.0516520738601685, + "loss_ce": 0.0003825381863862276, + "loss_iou": 0.49609375, + "loss_num": 0.0120849609375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 203925644, + "step": 1614 + }, + { + "epoch": 0.41428846277175657, + "grad_norm": 42.202938079833984, + "learning_rate": 5e-06, + "loss": 1.1336, + "num_input_tokens_seen": 204052460, + "step": 1615 + }, + { + "epoch": 0.41428846277175657, + "loss": 1.2043044567108154, + "loss_ce": 0.002156055998057127, + "loss_iou": 0.546875, + "loss_num": 0.022216796875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 204052460, + "step": 1615 + }, + { + "epoch": 0.4145449881357019, + "grad_norm": 57.70219421386719, + "learning_rate": 5e-06, + "loss": 1.1603, + "num_input_tokens_seen": 204179520, + "step": 1616 + }, + { + "epoch": 0.4145449881357019, + "loss": 1.248015284538269, + "loss_ce": 0.0009449715726077557, + "loss_iou": 0.5703125, + "loss_num": 0.0211181640625, + "loss_xval": 1.25, + "num_input_tokens_seen": 204179520, + "step": 1616 + }, + { + "epoch": 0.4148015134996473, + "grad_norm": 77.36762237548828, + "learning_rate": 5e-06, + "loss": 1.1801, + "num_input_tokens_seen": 204306420, + "step": 1617 + }, + { + "epoch": 0.4148015134996473, + "loss": 1.20962393283844, + "loss_ce": 0.0006395644741132855, + "loss_iou": 0.5625, + "loss_num": 0.0172119140625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 204306420, + "step": 1617 + }, + { + "epoch": 0.41505803886359266, + "grad_norm": 56.69791793823242, + "learning_rate": 5e-06, + "loss": 1.031, + "num_input_tokens_seen": 204431660, + "step": 1618 + }, + { + "epoch": 0.41505803886359266, + "loss": 1.0564846992492676, + "loss_ce": 0.0003323976998217404, + "loss_iou": 0.50390625, + "loss_num": 0.010009765625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 204431660, + "step": 1618 + }, + { + "epoch": 0.415314564227538, + "grad_norm": 39.941017150878906, + "learning_rate": 5e-06, + "loss": 0.9796, + "num_input_tokens_seen": 204557480, + "step": 1619 + }, + { + "epoch": 0.415314564227538, + "loss": 1.0838923454284668, + "loss_ce": 0.0008845559787005186, + "loss_iou": 0.51171875, + "loss_num": 0.01214599609375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 204557480, + "step": 1619 + }, + { + "epoch": 0.41557108959148337, + "grad_norm": 47.552310943603516, + "learning_rate": 5e-06, + "loss": 1.122, + "num_input_tokens_seen": 204683312, + "step": 1620 + }, + { + "epoch": 0.41557108959148337, + "loss": 1.0735079050064087, + "loss_ce": 0.0012423183070495725, + "loss_iou": 0.5, + "loss_num": 0.0145263671875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 204683312, + "step": 1620 + }, + { + "epoch": 0.4158276149554287, + "grad_norm": 89.33126831054688, + "learning_rate": 5e-06, + "loss": 1.0803, + "num_input_tokens_seen": 204810332, + "step": 1621 + }, + { + "epoch": 0.4158276149554287, + "loss": 1.2124977111816406, + "loss_ce": 0.002048431197181344, + "loss_iou": 0.5546875, + "loss_num": 0.02099609375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 204810332, + "step": 1621 + }, + { + "epoch": 0.4160841403193741, + "grad_norm": 49.66191864013672, + "learning_rate": 5e-06, + "loss": 1.3202, + "num_input_tokens_seen": 204936580, + "step": 1622 + }, + { + "epoch": 0.4160841403193741, + "loss": 1.361793875694275, + "loss_ce": 0.001442310749553144, + "loss_iou": 0.62890625, + "loss_num": 0.0205078125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 204936580, + "step": 1622 + }, + { + "epoch": 0.41634066568331946, + "grad_norm": 46.02541732788086, + "learning_rate": 5e-06, + "loss": 1.1219, + "num_input_tokens_seen": 205062980, + "step": 1623 + }, + { + "epoch": 0.41634066568331946, + "loss": 1.0990922451019287, + "loss_ce": 0.0004594980855472386, + "loss_iou": 0.51171875, + "loss_num": 0.0146484375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 205062980, + "step": 1623 + }, + { + "epoch": 0.4165971910472648, + "grad_norm": 52.34889602661133, + "learning_rate": 5e-06, + "loss": 1.2072, + "num_input_tokens_seen": 205188652, + "step": 1624 + }, + { + "epoch": 0.4165971910472648, + "loss": 1.1092159748077393, + "loss_ce": 0.004235539119690657, + "loss_iou": 0.5078125, + "loss_num": 0.018310546875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 205188652, + "step": 1624 + }, + { + "epoch": 0.41685371641121016, + "grad_norm": 60.65018081665039, + "learning_rate": 5e-06, + "loss": 1.1441, + "num_input_tokens_seen": 205315476, + "step": 1625 + }, + { + "epoch": 0.41685371641121016, + "loss": 1.2122611999511719, + "loss_ce": 0.0003471853560768068, + "loss_iou": 0.57421875, + "loss_num": 0.0123291015625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 205315476, + "step": 1625 + }, + { + "epoch": 0.41711024177515554, + "grad_norm": 100.54743957519531, + "learning_rate": 5e-06, + "loss": 1.1206, + "num_input_tokens_seen": 205442692, + "step": 1626 + }, + { + "epoch": 0.41711024177515554, + "loss": 1.1039631366729736, + "loss_ce": 0.0038655202370136976, + "loss_iou": 0.5078125, + "loss_num": 0.016845703125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 205442692, + "step": 1626 + }, + { + "epoch": 0.41736676713910087, + "grad_norm": 48.566585540771484, + "learning_rate": 5e-06, + "loss": 1.2676, + "num_input_tokens_seen": 205569612, + "step": 1627 + }, + { + "epoch": 0.41736676713910087, + "loss": 1.0438179969787598, + "loss_ce": 0.0003609832201618701, + "loss_iou": 0.48046875, + "loss_num": 0.0167236328125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 205569612, + "step": 1627 + }, + { + "epoch": 0.41762329250304625, + "grad_norm": 40.65486526489258, + "learning_rate": 5e-06, + "loss": 1.1021, + "num_input_tokens_seen": 205695848, + "step": 1628 + }, + { + "epoch": 0.41762329250304625, + "loss": 0.9678932428359985, + "loss_ce": 0.0015846036840230227, + "loss_iou": 0.451171875, + "loss_num": 0.012939453125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 205695848, + "step": 1628 + }, + { + "epoch": 0.4178798178669916, + "grad_norm": 33.806114196777344, + "learning_rate": 5e-06, + "loss": 1.1296, + "num_input_tokens_seen": 205821864, + "step": 1629 + }, + { + "epoch": 0.4178798178669916, + "loss": 1.001373291015625, + "loss_ce": 0.00381465838290751, + "loss_iou": 0.46484375, + "loss_num": 0.01348876953125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 205821864, + "step": 1629 + }, + { + "epoch": 0.41813634323093696, + "grad_norm": 44.681095123291016, + "learning_rate": 5e-06, + "loss": 1.1251, + "num_input_tokens_seen": 205948028, + "step": 1630 + }, + { + "epoch": 0.41813634323093696, + "loss": 1.3766072988510132, + "loss_ce": 0.006490145344287157, + "loss_iou": 0.625, + "loss_num": 0.0235595703125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 205948028, + "step": 1630 + }, + { + "epoch": 0.41839286859488234, + "grad_norm": 64.68565368652344, + "learning_rate": 5e-06, + "loss": 1.1959, + "num_input_tokens_seen": 206074620, + "step": 1631 + }, + { + "epoch": 0.41839286859488234, + "loss": 1.1151930093765259, + "loss_ce": 0.0019117454066872597, + "loss_iou": 0.51171875, + "loss_num": 0.0179443359375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 206074620, + "step": 1631 + }, + { + "epoch": 0.41864939395882766, + "grad_norm": 74.66930389404297, + "learning_rate": 5e-06, + "loss": 1.0981, + "num_input_tokens_seen": 206201020, + "step": 1632 + }, + { + "epoch": 0.41864939395882766, + "loss": 1.0806163549423218, + "loss_ce": 0.0010265437886118889, + "loss_iou": 0.51171875, + "loss_num": 0.0108642578125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 206201020, + "step": 1632 + }, + { + "epoch": 0.41890591932277305, + "grad_norm": 44.213932037353516, + "learning_rate": 5e-06, + "loss": 1.0272, + "num_input_tokens_seen": 206327884, + "step": 1633 + }, + { + "epoch": 0.41890591932277305, + "loss": 1.1376762390136719, + "loss_ce": 0.0009575064177624881, + "loss_iou": 0.51953125, + "loss_num": 0.0189208984375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 206327884, + "step": 1633 + }, + { + "epoch": 0.41916244468671837, + "grad_norm": 49.7847900390625, + "learning_rate": 5e-06, + "loss": 1.1589, + "num_input_tokens_seen": 206453908, + "step": 1634 + }, + { + "epoch": 0.41916244468671837, + "loss": 1.195236086845398, + "loss_ce": 0.002853353973478079, + "loss_iou": 0.54296875, + "loss_num": 0.0211181640625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 206453908, + "step": 1634 + }, + { + "epoch": 0.41941897005066375, + "grad_norm": 48.77326583862305, + "learning_rate": 5e-06, + "loss": 1.0922, + "num_input_tokens_seen": 206579560, + "step": 1635 + }, + { + "epoch": 0.41941897005066375, + "loss": 1.1942617893218994, + "loss_ce": 0.001879069022834301, + "loss_iou": 0.56640625, + "loss_num": 0.0123291015625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 206579560, + "step": 1635 + }, + { + "epoch": 0.41967549541460913, + "grad_norm": 59.41028594970703, + "learning_rate": 5e-06, + "loss": 0.9281, + "num_input_tokens_seen": 206705940, + "step": 1636 + }, + { + "epoch": 0.41967549541460913, + "loss": 0.9515740871429443, + "loss_ce": 0.0004021739587187767, + "loss_iou": 0.455078125, + "loss_num": 0.0084228515625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 206705940, + "step": 1636 + }, + { + "epoch": 0.41993202077855446, + "grad_norm": 74.77178192138672, + "learning_rate": 5e-06, + "loss": 1.1077, + "num_input_tokens_seen": 206832216, + "step": 1637 + }, + { + "epoch": 0.41993202077855446, + "loss": 1.1981807947158813, + "loss_ce": 0.0009152039419859648, + "loss_iou": 0.55078125, + "loss_num": 0.0191650390625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 206832216, + "step": 1637 + }, + { + "epoch": 0.42018854614249984, + "grad_norm": 58.300376892089844, + "learning_rate": 5e-06, + "loss": 1.1902, + "num_input_tokens_seen": 206959228, + "step": 1638 + }, + { + "epoch": 0.42018854614249984, + "loss": 1.238316535949707, + "loss_ce": 0.001011868822388351, + "loss_iou": 0.56640625, + "loss_num": 0.02001953125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 206959228, + "step": 1638 + }, + { + "epoch": 0.4204450715064452, + "grad_norm": 68.75013732910156, + "learning_rate": 5e-06, + "loss": 0.9584, + "num_input_tokens_seen": 207085820, + "step": 1639 + }, + { + "epoch": 0.4204450715064452, + "loss": 0.9124757051467896, + "loss_ce": 0.00036630802787840366, + "loss_iou": 0.427734375, + "loss_num": 0.01129150390625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 207085820, + "step": 1639 + }, + { + "epoch": 0.42070159687039055, + "grad_norm": 51.79212188720703, + "learning_rate": 5e-06, + "loss": 1.2161, + "num_input_tokens_seen": 207211836, + "step": 1640 + }, + { + "epoch": 0.42070159687039055, + "loss": 1.104777455329895, + "loss_ce": 0.0007735765539109707, + "loss_iou": 0.5234375, + "loss_num": 0.01080322265625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 207211836, + "step": 1640 + }, + { + "epoch": 0.42095812223433593, + "grad_norm": 33.48579406738281, + "learning_rate": 5e-06, + "loss": 1.0281, + "num_input_tokens_seen": 207339856, + "step": 1641 + }, + { + "epoch": 0.42095812223433593, + "loss": 0.9371021389961243, + "loss_ce": 0.0020435622427612543, + "loss_iou": 0.4296875, + "loss_num": 0.01495361328125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 207339856, + "step": 1641 + }, + { + "epoch": 0.42121464759828126, + "grad_norm": 52.31923294067383, + "learning_rate": 5e-06, + "loss": 1.1891, + "num_input_tokens_seen": 207467044, + "step": 1642 + }, + { + "epoch": 0.42121464759828126, + "loss": 1.095590353012085, + "loss_ce": 0.0018403513822704554, + "loss_iou": 0.515625, + "loss_num": 0.012451171875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 207467044, + "step": 1642 + }, + { + "epoch": 0.42147117296222664, + "grad_norm": 104.14488983154297, + "learning_rate": 5e-06, + "loss": 1.0728, + "num_input_tokens_seen": 207593952, + "step": 1643 + }, + { + "epoch": 0.42147117296222664, + "loss": 1.0262861251831055, + "loss_ce": 0.003825117601081729, + "loss_iou": 0.47265625, + "loss_num": 0.015625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 207593952, + "step": 1643 + }, + { + "epoch": 0.421727698326172, + "grad_norm": 56.339027404785156, + "learning_rate": 5e-06, + "loss": 1.1859, + "num_input_tokens_seen": 207719980, + "step": 1644 + }, + { + "epoch": 0.421727698326172, + "loss": 1.3872721195220947, + "loss_ce": 0.0010417889570817351, + "loss_iou": 0.62890625, + "loss_num": 0.026123046875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 207719980, + "step": 1644 + }, + { + "epoch": 0.42198422369011734, + "grad_norm": 39.85120391845703, + "learning_rate": 5e-06, + "loss": 1.1114, + "num_input_tokens_seen": 207847076, + "step": 1645 + }, + { + "epoch": 0.42198422369011734, + "loss": 1.0069575309753418, + "loss_ce": 0.001098224543966353, + "loss_iou": 0.46484375, + "loss_num": 0.0152587890625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 207847076, + "step": 1645 + }, + { + "epoch": 0.4222407490540627, + "grad_norm": 75.31285858154297, + "learning_rate": 5e-06, + "loss": 1.0876, + "num_input_tokens_seen": 207972640, + "step": 1646 + }, + { + "epoch": 0.4222407490540627, + "loss": 1.1401056051254272, + "loss_ce": 0.00045717734610661864, + "loss_iou": 0.53515625, + "loss_num": 0.014404296875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 207972640, + "step": 1646 + }, + { + "epoch": 0.4224972744180081, + "grad_norm": 49.89292526245117, + "learning_rate": 5e-06, + "loss": 0.9842, + "num_input_tokens_seen": 208098256, + "step": 1647 + }, + { + "epoch": 0.4224972744180081, + "loss": 0.9398205280303955, + "loss_ce": 0.003297131508588791, + "loss_iou": 0.44140625, + "loss_num": 0.010498046875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 208098256, + "step": 1647 + }, + { + "epoch": 0.42275379978195343, + "grad_norm": 38.94388961791992, + "learning_rate": 5e-06, + "loss": 1.0707, + "num_input_tokens_seen": 208223560, + "step": 1648 + }, + { + "epoch": 0.42275379978195343, + "loss": 1.025101900100708, + "loss_ce": 0.0006878477288410068, + "loss_iou": 0.486328125, + "loss_num": 0.01068115234375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 208223560, + "step": 1648 + }, + { + "epoch": 0.4230103251458988, + "grad_norm": 45.26676559448242, + "learning_rate": 5e-06, + "loss": 1.1189, + "num_input_tokens_seen": 208350088, + "step": 1649 + }, + { + "epoch": 0.4230103251458988, + "loss": 1.1897099018096924, + "loss_ce": 0.00025679345708340406, + "loss_iou": 0.55078125, + "loss_num": 0.0179443359375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 208350088, + "step": 1649 + }, + { + "epoch": 0.42326685050984414, + "grad_norm": 83.2241439819336, + "learning_rate": 5e-06, + "loss": 1.18, + "num_input_tokens_seen": 208476332, + "step": 1650 + }, + { + "epoch": 0.42326685050984414, + "loss": 1.1875088214874268, + "loss_ce": 0.0009853194933384657, + "loss_iou": 0.55078125, + "loss_num": 0.0169677734375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 208476332, + "step": 1650 + }, + { + "epoch": 0.4235233758737895, + "grad_norm": 50.063880920410156, + "learning_rate": 5e-06, + "loss": 1.1067, + "num_input_tokens_seen": 208601848, + "step": 1651 + }, + { + "epoch": 0.4235233758737895, + "loss": 1.1624304056167603, + "loss_ce": 0.0012975574936717749, + "loss_iou": 0.53125, + "loss_num": 0.02001953125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 208601848, + "step": 1651 + }, + { + "epoch": 0.4237799012377349, + "grad_norm": 42.35184860229492, + "learning_rate": 5e-06, + "loss": 1.102, + "num_input_tokens_seen": 208728204, + "step": 1652 + }, + { + "epoch": 0.4237799012377349, + "loss": 1.1195818185806274, + "loss_ce": 0.0009294777410104871, + "loss_iou": 0.5234375, + "loss_num": 0.0147705078125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 208728204, + "step": 1652 + }, + { + "epoch": 0.4240364266016802, + "grad_norm": 53.56521224975586, + "learning_rate": 5e-06, + "loss": 0.9967, + "num_input_tokens_seen": 208854624, + "step": 1653 + }, + { + "epoch": 0.4240364266016802, + "loss": 1.0427217483520508, + "loss_ce": 0.0017061267280951142, + "loss_iou": 0.490234375, + "loss_num": 0.0120849609375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 208854624, + "step": 1653 + }, + { + "epoch": 0.4242929519656256, + "grad_norm": 69.64403533935547, + "learning_rate": 5e-06, + "loss": 1.0525, + "num_input_tokens_seen": 208979824, + "step": 1654 + }, + { + "epoch": 0.4242929519656256, + "loss": 0.9538745284080505, + "loss_ce": 0.0002612921816762537, + "loss_iou": 0.451171875, + "loss_num": 0.010498046875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 208979824, + "step": 1654 + }, + { + "epoch": 0.42454947732957093, + "grad_norm": 47.09593200683594, + "learning_rate": 5e-06, + "loss": 1.0658, + "num_input_tokens_seen": 209107408, + "step": 1655 + }, + { + "epoch": 0.42454947732957093, + "loss": 1.039341688156128, + "loss_ce": 0.00027919039712287486, + "loss_iou": 0.46484375, + "loss_num": 0.0216064453125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 209107408, + "step": 1655 + }, + { + "epoch": 0.4248060026935163, + "grad_norm": 55.104034423828125, + "learning_rate": 5e-06, + "loss": 1.1006, + "num_input_tokens_seen": 209233980, + "step": 1656 + }, + { + "epoch": 0.4248060026935163, + "loss": 0.9260310530662537, + "loss_ce": 0.0002498391841072589, + "loss_iou": 0.439453125, + "loss_num": 0.00946044921875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 209233980, + "step": 1656 + }, + { + "epoch": 0.4250625280574617, + "grad_norm": 61.89091110229492, + "learning_rate": 5e-06, + "loss": 1.1101, + "num_input_tokens_seen": 209360496, + "step": 1657 + }, + { + "epoch": 0.4250625280574617, + "loss": 0.9135799407958984, + "loss_ce": 0.0009822794236242771, + "loss_iou": 0.44140625, + "loss_num": 0.005828857421875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 209360496, + "step": 1657 + }, + { + "epoch": 0.425319053421407, + "grad_norm": 75.11056518554688, + "learning_rate": 5e-06, + "loss": 1.1138, + "num_input_tokens_seen": 209486104, + "step": 1658 + }, + { + "epoch": 0.425319053421407, + "loss": 1.1328139305114746, + "loss_ce": 0.001466244924813509, + "loss_iou": 0.515625, + "loss_num": 0.0201416015625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 209486104, + "step": 1658 + }, + { + "epoch": 0.4255755787853524, + "grad_norm": 50.10881805419922, + "learning_rate": 5e-06, + "loss": 1.124, + "num_input_tokens_seen": 209612336, + "step": 1659 + }, + { + "epoch": 0.4255755787853524, + "loss": 1.0528364181518555, + "loss_ce": 0.000590332958381623, + "loss_iou": 0.49609375, + "loss_num": 0.0125732421875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 209612336, + "step": 1659 + }, + { + "epoch": 0.4258321041492978, + "grad_norm": 50.55360794067383, + "learning_rate": 5e-06, + "loss": 1.0657, + "num_input_tokens_seen": 209738396, + "step": 1660 + }, + { + "epoch": 0.4258321041492978, + "loss": 1.0428487062454224, + "loss_ce": 0.0013447541277855635, + "loss_iou": 0.478515625, + "loss_num": 0.016845703125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 209738396, + "step": 1660 + }, + { + "epoch": 0.4260886295132431, + "grad_norm": 49.765769958496094, + "learning_rate": 5e-06, + "loss": 1.2556, + "num_input_tokens_seen": 209865172, + "step": 1661 + }, + { + "epoch": 0.4260886295132431, + "loss": 1.2541368007659912, + "loss_ce": 0.002183671109378338, + "loss_iou": 0.578125, + "loss_num": 0.0189208984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 209865172, + "step": 1661 + }, + { + "epoch": 0.4263451548771885, + "grad_norm": 39.32392883300781, + "learning_rate": 5e-06, + "loss": 1.025, + "num_input_tokens_seen": 209990984, + "step": 1662 + }, + { + "epoch": 0.4263451548771885, + "loss": 1.0351297855377197, + "loss_ce": 0.0009500670130364597, + "loss_iou": 0.46875, + "loss_num": 0.01904296875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 209990984, + "step": 1662 + }, + { + "epoch": 0.4266016802411338, + "grad_norm": 46.45328903198242, + "learning_rate": 5e-06, + "loss": 1.0134, + "num_input_tokens_seen": 210116624, + "step": 1663 + }, + { + "epoch": 0.4266016802411338, + "loss": 1.2170459032058716, + "loss_ce": 0.0022021338809281588, + "loss_iou": 0.54296875, + "loss_num": 0.0252685546875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 210116624, + "step": 1663 + }, + { + "epoch": 0.4268582056050792, + "grad_norm": 74.96824645996094, + "learning_rate": 5e-06, + "loss": 1.0088, + "num_input_tokens_seen": 210242424, + "step": 1664 + }, + { + "epoch": 0.4268582056050792, + "loss": 1.0115416049957275, + "loss_ce": 0.0012877867557108402, + "loss_iou": 0.466796875, + "loss_num": 0.01483154296875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 210242424, + "step": 1664 + }, + { + "epoch": 0.4271147309690246, + "grad_norm": 59.330047607421875, + "learning_rate": 5e-06, + "loss": 1.2514, + "num_input_tokens_seen": 210369156, + "step": 1665 + }, + { + "epoch": 0.4271147309690246, + "loss": 1.4214725494384766, + "loss_ce": 0.001062413677573204, + "loss_iou": 0.6484375, + "loss_num": 0.0240478515625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 210369156, + "step": 1665 + }, + { + "epoch": 0.4273712563329699, + "grad_norm": 21.97696876525879, + "learning_rate": 5e-06, + "loss": 1.089, + "num_input_tokens_seen": 210495756, + "step": 1666 + }, + { + "epoch": 0.4273712563329699, + "loss": 1.3233956098556519, + "loss_ce": 0.006012811791151762, + "loss_iou": 0.5859375, + "loss_num": 0.0296630859375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 210495756, + "step": 1666 + }, + { + "epoch": 0.4276277816969153, + "grad_norm": 27.928895950317383, + "learning_rate": 5e-06, + "loss": 1.0116, + "num_input_tokens_seen": 210621460, + "step": 1667 + }, + { + "epoch": 0.4276277816969153, + "loss": 0.9810816049575806, + "loss_ce": 0.0013452961575239897, + "loss_iou": 0.458984375, + "loss_num": 0.01263427734375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 210621460, + "step": 1667 + }, + { + "epoch": 0.42788430706086067, + "grad_norm": 30.50233268737793, + "learning_rate": 5e-06, + "loss": 1.0934, + "num_input_tokens_seen": 210747352, + "step": 1668 + }, + { + "epoch": 0.42788430706086067, + "loss": 1.1623146533966064, + "loss_ce": 0.00020530365873128176, + "loss_iou": 0.54296875, + "loss_num": 0.01507568359375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 210747352, + "step": 1668 + }, + { + "epoch": 0.428140832424806, + "grad_norm": 37.15053939819336, + "learning_rate": 5e-06, + "loss": 1.0555, + "num_input_tokens_seen": 210873128, + "step": 1669 + }, + { + "epoch": 0.428140832424806, + "loss": 0.949683666229248, + "loss_ce": 0.0004648909962270409, + "loss_iou": 0.443359375, + "loss_num": 0.0125732421875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 210873128, + "step": 1669 + }, + { + "epoch": 0.4283973577887514, + "grad_norm": 43.04761505126953, + "learning_rate": 5e-06, + "loss": 1.1937, + "num_input_tokens_seen": 210998712, + "step": 1670 + }, + { + "epoch": 0.4283973577887514, + "loss": 1.0563784837722778, + "loss_ce": 0.0007144762203097343, + "loss_iou": 0.494140625, + "loss_num": 0.0135498046875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 210998712, + "step": 1670 + }, + { + "epoch": 0.4286538831526967, + "grad_norm": 59.648155212402344, + "learning_rate": 5e-06, + "loss": 1.0378, + "num_input_tokens_seen": 211125456, + "step": 1671 + }, + { + "epoch": 0.4286538831526967, + "loss": 1.248727560043335, + "loss_ce": 0.0021456300746649504, + "loss_iou": 0.5625, + "loss_num": 0.0245361328125, + "loss_xval": 1.25, + "num_input_tokens_seen": 211125456, + "step": 1671 + }, + { + "epoch": 0.4289104085166421, + "grad_norm": 56.436737060546875, + "learning_rate": 5e-06, + "loss": 1.1041, + "num_input_tokens_seen": 211252004, + "step": 1672 + }, + { + "epoch": 0.4289104085166421, + "loss": 1.1503679752349854, + "loss_ce": 0.0019303737208247185, + "loss_iou": 0.5234375, + "loss_num": 0.020263671875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 211252004, + "step": 1672 + }, + { + "epoch": 0.42916693388058746, + "grad_norm": 71.11824035644531, + "learning_rate": 5e-06, + "loss": 1.1049, + "num_input_tokens_seen": 211378052, + "step": 1673 + }, + { + "epoch": 0.42916693388058746, + "loss": 0.8712977170944214, + "loss_ce": 0.0021570592653006315, + "loss_iou": 0.419921875, + "loss_num": 0.006134033203125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 211378052, + "step": 1673 + }, + { + "epoch": 0.4294234592445328, + "grad_norm": 52.73110580444336, + "learning_rate": 5e-06, + "loss": 1.1802, + "num_input_tokens_seen": 211503648, + "step": 1674 + }, + { + "epoch": 0.4294234592445328, + "loss": 1.173094630241394, + "loss_ce": 0.0021962355822324753, + "loss_iou": 0.5546875, + "loss_num": 0.0120849609375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 211503648, + "step": 1674 + }, + { + "epoch": 0.42967998460847817, + "grad_norm": 27.168691635131836, + "learning_rate": 5e-06, + "loss": 0.978, + "num_input_tokens_seen": 211630584, + "step": 1675 + }, + { + "epoch": 0.42967998460847817, + "loss": 0.7920674681663513, + "loss_ce": 0.0005635780980810523, + "loss_iou": 0.37890625, + "loss_num": 0.00665283203125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 211630584, + "step": 1675 + }, + { + "epoch": 0.4299365099724235, + "grad_norm": 35.89946365356445, + "learning_rate": 5e-06, + "loss": 1.0427, + "num_input_tokens_seen": 211755400, + "step": 1676 + }, + { + "epoch": 0.4299365099724235, + "loss": 0.9957920908927917, + "loss_ce": 0.00018661384820006788, + "loss_iou": 0.46875, + "loss_num": 0.01171875, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 211755400, + "step": 1676 + }, + { + "epoch": 0.4301930353363689, + "grad_norm": 73.55236053466797, + "learning_rate": 5e-06, + "loss": 1.0297, + "num_input_tokens_seen": 211881792, + "step": 1677 + }, + { + "epoch": 0.4301930353363689, + "loss": 0.9825071096420288, + "loss_ce": 0.0005735284648835659, + "loss_iou": 0.45703125, + "loss_num": 0.01361083984375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 211881792, + "step": 1677 + }, + { + "epoch": 0.43044956070031426, + "grad_norm": 43.15802764892578, + "learning_rate": 5e-06, + "loss": 1.258, + "num_input_tokens_seen": 212007792, + "step": 1678 + }, + { + "epoch": 0.43044956070031426, + "loss": 1.1792137622833252, + "loss_ce": 0.003432475496083498, + "loss_iou": 0.546875, + "loss_num": 0.0166015625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 212007792, + "step": 1678 + }, + { + "epoch": 0.4307060860642596, + "grad_norm": 37.76390838623047, + "learning_rate": 5e-06, + "loss": 1.0209, + "num_input_tokens_seen": 212134384, + "step": 1679 + }, + { + "epoch": 0.4307060860642596, + "loss": 0.923201322555542, + "loss_ce": 0.0005939488764852285, + "loss_iou": 0.427734375, + "loss_num": 0.01287841796875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 212134384, + "step": 1679 + }, + { + "epoch": 0.43096261142820497, + "grad_norm": 59.22579574584961, + "learning_rate": 5e-06, + "loss": 1.0173, + "num_input_tokens_seen": 212260488, + "step": 1680 + }, + { + "epoch": 0.43096261142820497, + "loss": 0.9751040935516357, + "loss_ce": 0.0009829895570874214, + "loss_iou": 0.46484375, + "loss_num": 0.00921630859375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 212260488, + "step": 1680 + }, + { + "epoch": 0.43121913679215035, + "grad_norm": 76.30946350097656, + "learning_rate": 5e-06, + "loss": 1.2586, + "num_input_tokens_seen": 212386728, + "step": 1681 + }, + { + "epoch": 0.43121913679215035, + "loss": 0.9621812105178833, + "loss_ce": 0.0005112984217703342, + "loss_iou": 0.4609375, + "loss_num": 0.00799560546875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 212386728, + "step": 1681 + }, + { + "epoch": 0.4314756621560957, + "grad_norm": 59.16542053222656, + "learning_rate": 5e-06, + "loss": 1.0156, + "num_input_tokens_seen": 212511524, + "step": 1682 + }, + { + "epoch": 0.4314756621560957, + "loss": 0.8259526491165161, + "loss_ce": 0.0002690640976652503, + "loss_iou": 0.39453125, + "loss_num": 0.00714111328125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 212511524, + "step": 1682 + }, + { + "epoch": 0.43173218752004106, + "grad_norm": 54.78873825073242, + "learning_rate": 5e-06, + "loss": 0.968, + "num_input_tokens_seen": 212638756, + "step": 1683 + }, + { + "epoch": 0.43173218752004106, + "loss": 1.0525240898132324, + "loss_ce": 0.002231112215667963, + "loss_iou": 0.486328125, + "loss_num": 0.01531982421875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 212638756, + "step": 1683 + }, + { + "epoch": 0.4319887128839864, + "grad_norm": 43.38678741455078, + "learning_rate": 5e-06, + "loss": 1.1246, + "num_input_tokens_seen": 212765016, + "step": 1684 + }, + { + "epoch": 0.4319887128839864, + "loss": 1.2292381525039673, + "loss_ce": 0.0007225493900477886, + "loss_iou": 0.5625, + "loss_num": 0.0208740234375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 212765016, + "step": 1684 + }, + { + "epoch": 0.43224523824793176, + "grad_norm": 60.852508544921875, + "learning_rate": 5e-06, + "loss": 1.1452, + "num_input_tokens_seen": 212891064, + "step": 1685 + }, + { + "epoch": 0.43224523824793176, + "loss": 1.0859408378601074, + "loss_ce": 0.003909580875188112, + "loss_iou": 0.50390625, + "loss_num": 0.0142822265625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 212891064, + "step": 1685 + }, + { + "epoch": 0.43250176361187714, + "grad_norm": 43.29603576660156, + "learning_rate": 5e-06, + "loss": 1.2806, + "num_input_tokens_seen": 213016908, + "step": 1686 + }, + { + "epoch": 0.43250176361187714, + "loss": 1.3492157459259033, + "loss_ce": 0.0015595202567055821, + "loss_iou": 0.61328125, + "loss_num": 0.023681640625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 213016908, + "step": 1686 + }, + { + "epoch": 0.43275828897582247, + "grad_norm": 39.058982849121094, + "learning_rate": 5e-06, + "loss": 0.9249, + "num_input_tokens_seen": 213142960, + "step": 1687 + }, + { + "epoch": 0.43275828897582247, + "loss": 0.8114954829216003, + "loss_ce": 0.00046032428508624434, + "loss_iou": 0.38671875, + "loss_num": 0.007537841796875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 213142960, + "step": 1687 + }, + { + "epoch": 0.43301481433976785, + "grad_norm": 37.85932159423828, + "learning_rate": 5e-06, + "loss": 1.1708, + "num_input_tokens_seen": 213268476, + "step": 1688 + }, + { + "epoch": 0.43301481433976785, + "loss": 1.3028829097747803, + "loss_ce": 0.0035665498580783606, + "loss_iou": 0.578125, + "loss_num": 0.027587890625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 213268476, + "step": 1688 + }, + { + "epoch": 0.43327133970371323, + "grad_norm": 48.668487548828125, + "learning_rate": 5e-06, + "loss": 1.0394, + "num_input_tokens_seen": 213395764, + "step": 1689 + }, + { + "epoch": 0.43327133970371323, + "loss": 1.1256420612335205, + "loss_ce": 0.0006420727004297078, + "loss_iou": 0.51171875, + "loss_num": 0.0203857421875, + "loss_xval": 1.125, + "num_input_tokens_seen": 213395764, + "step": 1689 + }, + { + "epoch": 0.43352786506765856, + "grad_norm": 42.9271240234375, + "learning_rate": 5e-06, + "loss": 1.0501, + "num_input_tokens_seen": 213522828, + "step": 1690 + }, + { + "epoch": 0.43352786506765856, + "loss": 1.0162036418914795, + "loss_ce": 0.000578532402869314, + "loss_iou": 0.478515625, + "loss_num": 0.01190185546875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 213522828, + "step": 1690 + }, + { + "epoch": 0.43378439043160394, + "grad_norm": 64.90715026855469, + "learning_rate": 5e-06, + "loss": 1.0359, + "num_input_tokens_seen": 213650352, + "step": 1691 + }, + { + "epoch": 0.43378439043160394, + "loss": 1.0330381393432617, + "loss_ce": 0.0012999402824789286, + "loss_iou": 0.482421875, + "loss_num": 0.01312255859375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 213650352, + "step": 1691 + }, + { + "epoch": 0.43404091579554926, + "grad_norm": 46.68683624267578, + "learning_rate": 5e-06, + "loss": 1.2712, + "num_input_tokens_seen": 213776636, + "step": 1692 + }, + { + "epoch": 0.43404091579554926, + "loss": 1.0397357940673828, + "loss_ce": 0.0006732118199579418, + "loss_iou": 0.49609375, + "loss_num": 0.0091552734375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 213776636, + "step": 1692 + }, + { + "epoch": 0.43429744115949465, + "grad_norm": 35.813636779785156, + "learning_rate": 5e-06, + "loss": 1.0869, + "num_input_tokens_seen": 213902904, + "step": 1693 + }, + { + "epoch": 0.43429744115949465, + "loss": 1.4020848274230957, + "loss_ce": 0.0007176260696724057, + "loss_iou": 0.640625, + "loss_num": 0.0238037109375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 213902904, + "step": 1693 + }, + { + "epoch": 0.43455396652344, + "grad_norm": 60.409934997558594, + "learning_rate": 5e-06, + "loss": 1.0868, + "num_input_tokens_seen": 214028792, + "step": 1694 + }, + { + "epoch": 0.43455396652344, + "loss": 1.0841947793960571, + "loss_ce": 0.0041166553273797035, + "loss_iou": 0.490234375, + "loss_num": 0.0198974609375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 214028792, + "step": 1694 + }, + { + "epoch": 0.43481049188738535, + "grad_norm": 44.60717010498047, + "learning_rate": 5e-06, + "loss": 1.1302, + "num_input_tokens_seen": 214154564, + "step": 1695 + }, + { + "epoch": 0.43481049188738535, + "loss": 0.9172141551971436, + "loss_ce": 0.001686788396909833, + "loss_iou": 0.431640625, + "loss_num": 0.01019287109375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 214154564, + "step": 1695 + }, + { + "epoch": 0.43506701725133073, + "grad_norm": 33.89347457885742, + "learning_rate": 5e-06, + "loss": 1.028, + "num_input_tokens_seen": 214281148, + "step": 1696 + }, + { + "epoch": 0.43506701725133073, + "loss": 0.8942193984985352, + "loss_ce": 0.0004205804434604943, + "loss_iou": 0.4140625, + "loss_num": 0.01312255859375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 214281148, + "step": 1696 + }, + { + "epoch": 0.43532354261527606, + "grad_norm": 36.94711685180664, + "learning_rate": 5e-06, + "loss": 1.0375, + "num_input_tokens_seen": 214408464, + "step": 1697 + }, + { + "epoch": 0.43532354261527606, + "loss": 1.127626895904541, + "loss_ce": 0.0011620090808719397, + "loss_iou": 0.5234375, + "loss_num": 0.0167236328125, + "loss_xval": 1.125, + "num_input_tokens_seen": 214408464, + "step": 1697 + }, + { + "epoch": 0.43558006797922144, + "grad_norm": 51.89728546142578, + "learning_rate": 5e-06, + "loss": 1.0329, + "num_input_tokens_seen": 214534768, + "step": 1698 + }, + { + "epoch": 0.43558006797922144, + "loss": 1.0764617919921875, + "loss_ce": 0.001266501029022038, + "loss_iou": 0.48828125, + "loss_num": 0.019775390625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 214534768, + "step": 1698 + }, + { + "epoch": 0.4358365933431668, + "grad_norm": 56.97518539428711, + "learning_rate": 5e-06, + "loss": 1.091, + "num_input_tokens_seen": 214661080, + "step": 1699 + }, + { + "epoch": 0.4358365933431668, + "loss": 1.1828217506408691, + "loss_ce": 0.00850541889667511, + "loss_iou": 0.53125, + "loss_num": 0.0223388671875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 214661080, + "step": 1699 + }, + { + "epoch": 0.43609311870711215, + "grad_norm": 66.85093688964844, + "learning_rate": 5e-06, + "loss": 0.9973, + "num_input_tokens_seen": 214787764, + "step": 1700 + }, + { + "epoch": 0.43609311870711215, + "loss": 1.0678298473358154, + "loss_ce": 0.0028884424827992916, + "loss_iou": 0.49609375, + "loss_num": 0.014404296875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 214787764, + "step": 1700 + }, + { + "epoch": 0.43634964407105753, + "grad_norm": 61.146766662597656, + "learning_rate": 5e-06, + "loss": 1.1892, + "num_input_tokens_seen": 214914876, + "step": 1701 + }, + { + "epoch": 0.43634964407105753, + "loss": 1.1562916040420532, + "loss_ce": 0.0049244724214077, + "loss_iou": 0.5390625, + "loss_num": 0.01385498046875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 214914876, + "step": 1701 + }, + { + "epoch": 0.4366061694350029, + "grad_norm": 74.96965026855469, + "learning_rate": 5e-06, + "loss": 1.0846, + "num_input_tokens_seen": 215041840, + "step": 1702 + }, + { + "epoch": 0.4366061694350029, + "loss": 1.153557538986206, + "loss_ce": 0.00023729816894046962, + "loss_iou": 0.5234375, + "loss_num": 0.0213623046875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 215041840, + "step": 1702 + }, + { + "epoch": 0.43686269479894824, + "grad_norm": 49.68824768066406, + "learning_rate": 5e-06, + "loss": 1.1794, + "num_input_tokens_seen": 215167568, + "step": 1703 + }, + { + "epoch": 0.43686269479894824, + "loss": 1.2320411205291748, + "loss_ce": 0.0005958082620054483, + "loss_iou": 0.56640625, + "loss_num": 0.0191650390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 215167568, + "step": 1703 + }, + { + "epoch": 0.4371192201628936, + "grad_norm": 28.13874053955078, + "learning_rate": 5e-06, + "loss": 1.0946, + "num_input_tokens_seen": 215293568, + "step": 1704 + }, + { + "epoch": 0.4371192201628936, + "loss": 1.2300231456756592, + "loss_ce": 0.0029722554609179497, + "loss_iou": 0.546875, + "loss_num": 0.0269775390625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 215293568, + "step": 1704 + }, + { + "epoch": 0.43737574552683894, + "grad_norm": 43.31360626220703, + "learning_rate": 5e-06, + "loss": 1.0783, + "num_input_tokens_seen": 215419708, + "step": 1705 + }, + { + "epoch": 0.43737574552683894, + "loss": 1.0617132186889648, + "loss_ce": 0.005560874938964844, + "loss_iou": 0.453125, + "loss_num": 0.0303955078125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 215419708, + "step": 1705 + }, + { + "epoch": 0.4376322708907843, + "grad_norm": 47.878501892089844, + "learning_rate": 5e-06, + "loss": 1.1204, + "num_input_tokens_seen": 215545032, + "step": 1706 + }, + { + "epoch": 0.4376322708907843, + "loss": 1.117936611175537, + "loss_ce": 0.0017257253639400005, + "loss_iou": 0.50390625, + "loss_num": 0.021484375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 215545032, + "step": 1706 + }, + { + "epoch": 0.4378887962547297, + "grad_norm": 33.08382034301758, + "learning_rate": 5e-06, + "loss": 1.0077, + "num_input_tokens_seen": 215670804, + "step": 1707 + }, + { + "epoch": 0.4378887962547297, + "loss": 1.241870641708374, + "loss_ce": 0.004321817308664322, + "loss_iou": 0.55078125, + "loss_num": 0.0267333984375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 215670804, + "step": 1707 + }, + { + "epoch": 0.43814532161867503, + "grad_norm": 60.26030731201172, + "learning_rate": 5e-06, + "loss": 1.1039, + "num_input_tokens_seen": 215797588, + "step": 1708 + }, + { + "epoch": 0.43814532161867503, + "loss": 1.2055153846740723, + "loss_ce": 0.0038552528712898493, + "loss_iou": 0.54296875, + "loss_num": 0.0224609375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 215797588, + "step": 1708 + }, + { + "epoch": 0.4384018469826204, + "grad_norm": 99.734375, + "learning_rate": 5e-06, + "loss": 1.1371, + "num_input_tokens_seen": 215924552, + "step": 1709 + }, + { + "epoch": 0.4384018469826204, + "loss": 1.1549296379089355, + "loss_ce": 0.0006327689625322819, + "loss_iou": 0.5390625, + "loss_num": 0.0146484375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 215924552, + "step": 1709 + }, + { + "epoch": 0.43865837234656574, + "grad_norm": 50.84249496459961, + "learning_rate": 5e-06, + "loss": 1.1741, + "num_input_tokens_seen": 216050496, + "step": 1710 + }, + { + "epoch": 0.43865837234656574, + "loss": 1.2726256847381592, + "loss_ce": 0.003094491781666875, + "loss_iou": 0.57421875, + "loss_num": 0.0242919921875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 216050496, + "step": 1710 + }, + { + "epoch": 0.4389148977105111, + "grad_norm": 49.006404876708984, + "learning_rate": 5e-06, + "loss": 1.0052, + "num_input_tokens_seen": 216176652, + "step": 1711 + }, + { + "epoch": 0.4389148977105111, + "loss": 0.8756401538848877, + "loss_ce": 0.00039604902849532664, + "loss_iou": 0.408203125, + "loss_num": 0.01141357421875, + "loss_xval": 0.875, + "num_input_tokens_seen": 216176652, + "step": 1711 + }, + { + "epoch": 0.4391714230744565, + "grad_norm": 57.28960418701172, + "learning_rate": 5e-06, + "loss": 1.1369, + "num_input_tokens_seen": 216303056, + "step": 1712 + }, + { + "epoch": 0.4391714230744565, + "loss": 1.0891642570495605, + "loss_ce": 0.00029716239077970386, + "loss_iou": 0.51171875, + "loss_num": 0.0130615234375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 216303056, + "step": 1712 + }, + { + "epoch": 0.4394279484384018, + "grad_norm": 41.549110412597656, + "learning_rate": 5e-06, + "loss": 1.1071, + "num_input_tokens_seen": 216430404, + "step": 1713 + }, + { + "epoch": 0.4394279484384018, + "loss": 1.3964653015136719, + "loss_ce": 0.0009574639843776822, + "loss_iou": 0.625, + "loss_num": 0.028564453125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 216430404, + "step": 1713 + }, + { + "epoch": 0.4396844738023472, + "grad_norm": 38.457977294921875, + "learning_rate": 5e-06, + "loss": 1.0112, + "num_input_tokens_seen": 216555976, + "step": 1714 + }, + { + "epoch": 0.4396844738023472, + "loss": 1.184187412261963, + "loss_ce": 0.0010819791350513697, + "loss_iou": 0.52734375, + "loss_num": 0.0252685546875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 216555976, + "step": 1714 + }, + { + "epoch": 0.4399409991662926, + "grad_norm": 55.67451095581055, + "learning_rate": 5e-06, + "loss": 1.0446, + "num_input_tokens_seen": 216682616, + "step": 1715 + }, + { + "epoch": 0.4399409991662926, + "loss": 1.0428118705749512, + "loss_ce": 0.00033150549279525876, + "loss_iou": 0.4765625, + "loss_num": 0.017822265625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 216682616, + "step": 1715 + }, + { + "epoch": 0.4401975245302379, + "grad_norm": 72.97443389892578, + "learning_rate": 5e-06, + "loss": 1.0097, + "num_input_tokens_seen": 216809512, + "step": 1716 + }, + { + "epoch": 0.4401975245302379, + "loss": 1.1563479900360107, + "loss_ce": 0.0030277553014457226, + "loss_iou": 0.53515625, + "loss_num": 0.0164794921875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 216809512, + "step": 1716 + }, + { + "epoch": 0.4404540498941833, + "grad_norm": 59.25205993652344, + "learning_rate": 5e-06, + "loss": 1.117, + "num_input_tokens_seen": 216936232, + "step": 1717 + }, + { + "epoch": 0.4404540498941833, + "loss": 1.1371543407440186, + "loss_ce": 0.004830121994018555, + "loss_iou": 0.51953125, + "loss_num": 0.019287109375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 216936232, + "step": 1717 + }, + { + "epoch": 0.4407105752581286, + "grad_norm": 45.468631744384766, + "learning_rate": 5e-06, + "loss": 1.1769, + "num_input_tokens_seen": 217062324, + "step": 1718 + }, + { + "epoch": 0.4407105752581286, + "loss": 1.1025447845458984, + "loss_ce": 0.0004939006757922471, + "loss_iou": 0.51953125, + "loss_num": 0.01336669921875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 217062324, + "step": 1718 + }, + { + "epoch": 0.440967100622074, + "grad_norm": 50.48375701904297, + "learning_rate": 5e-06, + "loss": 1.2932, + "num_input_tokens_seen": 217188588, + "step": 1719 + }, + { + "epoch": 0.440967100622074, + "loss": 1.0813567638397217, + "loss_ce": 0.0017669497756287456, + "loss_iou": 0.49609375, + "loss_num": 0.01708984375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 217188588, + "step": 1719 + }, + { + "epoch": 0.4412236259860194, + "grad_norm": 66.76935577392578, + "learning_rate": 5e-06, + "loss": 1.129, + "num_input_tokens_seen": 217315184, + "step": 1720 + }, + { + "epoch": 0.4412236259860194, + "loss": 1.1760069131851196, + "loss_ce": 0.002667068038135767, + "loss_iou": 0.53125, + "loss_num": 0.021484375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 217315184, + "step": 1720 + }, + { + "epoch": 0.4414801513499647, + "grad_norm": 46.163448333740234, + "learning_rate": 5e-06, + "loss": 1.0255, + "num_input_tokens_seen": 217440320, + "step": 1721 + }, + { + "epoch": 0.4414801513499647, + "loss": 1.0357567071914673, + "loss_ce": 0.004018500447273254, + "loss_iou": 0.46484375, + "loss_num": 0.0206298828125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 217440320, + "step": 1721 + }, + { + "epoch": 0.4417366767139101, + "grad_norm": 28.159130096435547, + "learning_rate": 5e-06, + "loss": 1.1231, + "num_input_tokens_seen": 217566980, + "step": 1722 + }, + { + "epoch": 0.4417366767139101, + "loss": 1.2694045305252075, + "loss_ce": 0.0032912425231188536, + "loss_iou": 0.5703125, + "loss_num": 0.0257568359375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 217566980, + "step": 1722 + }, + { + "epoch": 0.4419932020778555, + "grad_norm": 25.083528518676758, + "learning_rate": 5e-06, + "loss": 0.9489, + "num_input_tokens_seen": 217692868, + "step": 1723 + }, + { + "epoch": 0.4419932020778555, + "loss": 0.9106738567352295, + "loss_ce": 0.0014941783156245947, + "loss_iou": 0.427734375, + "loss_num": 0.0108642578125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 217692868, + "step": 1723 + }, + { + "epoch": 0.4422497274418008, + "grad_norm": 32.0662841796875, + "learning_rate": 5e-06, + "loss": 1.0154, + "num_input_tokens_seen": 217819496, + "step": 1724 + }, + { + "epoch": 0.4422497274418008, + "loss": 0.8857411742210388, + "loss_ce": 0.00048729853006079793, + "loss_iou": 0.42578125, + "loss_num": 0.006683349609375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 217819496, + "step": 1724 + }, + { + "epoch": 0.4425062528057462, + "grad_norm": 38.85762405395508, + "learning_rate": 5e-06, + "loss": 1.0428, + "num_input_tokens_seen": 217945548, + "step": 1725 + }, + { + "epoch": 0.4425062528057462, + "loss": 0.9111621379852295, + "loss_ce": 0.0029589852783828974, + "loss_iou": 0.42578125, + "loss_num": 0.0108642578125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 217945548, + "step": 1725 + }, + { + "epoch": 0.4427627781696915, + "grad_norm": 53.181636810302734, + "learning_rate": 5e-06, + "loss": 1.1949, + "num_input_tokens_seen": 218072168, + "step": 1726 + }, + { + "epoch": 0.4427627781696915, + "loss": 1.2823820114135742, + "loss_ce": 0.003085160395130515, + "loss_iou": 0.56640625, + "loss_num": 0.029296875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 218072168, + "step": 1726 + }, + { + "epoch": 0.4430193035336369, + "grad_norm": 47.58131408691406, + "learning_rate": 5e-06, + "loss": 1.0586, + "num_input_tokens_seen": 218199204, + "step": 1727 + }, + { + "epoch": 0.4430193035336369, + "loss": 1.1768717765808105, + "loss_ce": 0.002067042514681816, + "loss_iou": 0.53515625, + "loss_num": 0.0211181640625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 218199204, + "step": 1727 + }, + { + "epoch": 0.44327582889758227, + "grad_norm": 52.15944290161133, + "learning_rate": 5e-06, + "loss": 0.9872, + "num_input_tokens_seen": 218324740, + "step": 1728 + }, + { + "epoch": 0.44327582889758227, + "loss": 1.1255285739898682, + "loss_ce": 0.0010168793378397822, + "loss_iou": 0.4921875, + "loss_num": 0.027587890625, + "loss_xval": 1.125, + "num_input_tokens_seen": 218324740, + "step": 1728 + }, + { + "epoch": 0.4435323542615276, + "grad_norm": 69.7750244140625, + "learning_rate": 5e-06, + "loss": 1.1588, + "num_input_tokens_seen": 218451404, + "step": 1729 + }, + { + "epoch": 0.4435323542615276, + "loss": 1.0207029581069946, + "loss_ce": 0.0006835001986473799, + "loss_iou": 0.486328125, + "loss_num": 0.00927734375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 218451404, + "step": 1729 + }, + { + "epoch": 0.443788879625473, + "grad_norm": 81.23307800292969, + "learning_rate": 5e-06, + "loss": 1.1839, + "num_input_tokens_seen": 218578104, + "step": 1730 + }, + { + "epoch": 0.443788879625473, + "loss": 0.964625358581543, + "loss_ce": 0.00026984367286786437, + "loss_iou": 0.443359375, + "loss_num": 0.0155029296875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 218578104, + "step": 1730 + }, + { + "epoch": 0.4440454049894183, + "grad_norm": 70.81971740722656, + "learning_rate": 5e-06, + "loss": 1.2513, + "num_input_tokens_seen": 218703752, + "step": 1731 + }, + { + "epoch": 0.4440454049894183, + "loss": 1.2160639762878418, + "loss_ce": 0.0017085422296077013, + "loss_iou": 0.56640625, + "loss_num": 0.0159912109375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 218703752, + "step": 1731 + }, + { + "epoch": 0.4443019303533637, + "grad_norm": 41.94395065307617, + "learning_rate": 5e-06, + "loss": 0.9854, + "num_input_tokens_seen": 218831260, + "step": 1732 + }, + { + "epoch": 0.4443019303533637, + "loss": 0.863932728767395, + "loss_ce": 0.00016315290122292936, + "loss_iou": 0.4140625, + "loss_num": 0.007537841796875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 218831260, + "step": 1732 + }, + { + "epoch": 0.44455845571730906, + "grad_norm": 52.87050247192383, + "learning_rate": 5e-06, + "loss": 1.0794, + "num_input_tokens_seen": 218956900, + "step": 1733 + }, + { + "epoch": 0.44455845571730906, + "loss": 1.021039366722107, + "loss_ce": 0.002484695753082633, + "loss_iou": 0.4765625, + "loss_num": 0.01336669921875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 218956900, + "step": 1733 + }, + { + "epoch": 0.4448149810812544, + "grad_norm": 77.93696594238281, + "learning_rate": 5e-06, + "loss": 1.048, + "num_input_tokens_seen": 219082204, + "step": 1734 + }, + { + "epoch": 0.4448149810812544, + "loss": 1.1230113506317139, + "loss_ce": 0.002405996434390545, + "loss_iou": 0.51953125, + "loss_num": 0.0162353515625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 219082204, + "step": 1734 + }, + { + "epoch": 0.44507150644519977, + "grad_norm": 51.49489212036133, + "learning_rate": 5e-06, + "loss": 1.1849, + "num_input_tokens_seen": 219207320, + "step": 1735 + }, + { + "epoch": 0.44507150644519977, + "loss": 1.182167410850525, + "loss_ce": 0.0015033646486699581, + "loss_iou": 0.54296875, + "loss_num": 0.01904296875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 219207320, + "step": 1735 + }, + { + "epoch": 0.44532803180914515, + "grad_norm": 64.10763549804688, + "learning_rate": 5e-06, + "loss": 1.0117, + "num_input_tokens_seen": 219334420, + "step": 1736 + }, + { + "epoch": 0.44532803180914515, + "loss": 1.1375796794891357, + "loss_ce": 0.001349158468656242, + "loss_iou": 0.5234375, + "loss_num": 0.0172119140625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 219334420, + "step": 1736 + }, + { + "epoch": 0.4455845571730905, + "grad_norm": 60.665958404541016, + "learning_rate": 5e-06, + "loss": 1.1388, + "num_input_tokens_seen": 219460968, + "step": 1737 + }, + { + "epoch": 0.4455845571730905, + "loss": 1.2081689834594727, + "loss_ce": 0.0011378065682947636, + "loss_iou": 0.5625, + "loss_num": 0.0172119140625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 219460968, + "step": 1737 + }, + { + "epoch": 0.44584108253703586, + "grad_norm": 51.04098129272461, + "learning_rate": 5e-06, + "loss": 1.1346, + "num_input_tokens_seen": 219587608, + "step": 1738 + }, + { + "epoch": 0.44584108253703586, + "loss": 1.051917552947998, + "loss_ce": 0.001380457542836666, + "loss_iou": 0.486328125, + "loss_num": 0.0152587890625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 219587608, + "step": 1738 + }, + { + "epoch": 0.4460976079009812, + "grad_norm": 39.799922943115234, + "learning_rate": 5e-06, + "loss": 0.9987, + "num_input_tokens_seen": 219713428, + "step": 1739 + }, + { + "epoch": 0.4460976079009812, + "loss": 0.918642520904541, + "loss_ce": 0.005068228580057621, + "loss_iou": 0.421875, + "loss_num": 0.0137939453125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 219713428, + "step": 1739 + }, + { + "epoch": 0.44635413326492657, + "grad_norm": 49.80137634277344, + "learning_rate": 5e-06, + "loss": 1.0599, + "num_input_tokens_seen": 219839820, + "step": 1740 + }, + { + "epoch": 0.44635413326492657, + "loss": 1.207512617111206, + "loss_ce": 0.0024344762787222862, + "loss_iou": 0.52734375, + "loss_num": 0.030517578125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 219839820, + "step": 1740 + }, + { + "epoch": 0.44661065862887195, + "grad_norm": 68.66104125976562, + "learning_rate": 5e-06, + "loss": 1.0696, + "num_input_tokens_seen": 219966636, + "step": 1741 + }, + { + "epoch": 0.44661065862887195, + "loss": 1.0989265441894531, + "loss_ce": 0.00322341313585639, + "loss_iou": 0.498046875, + "loss_num": 0.019775390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 219966636, + "step": 1741 + }, + { + "epoch": 0.4468671839928173, + "grad_norm": 58.057106018066406, + "learning_rate": 5e-06, + "loss": 1.0122, + "num_input_tokens_seen": 220092768, + "step": 1742 + }, + { + "epoch": 0.4468671839928173, + "loss": 1.058262825012207, + "loss_ce": 0.0011339513584971428, + "loss_iou": 0.490234375, + "loss_num": 0.0155029296875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 220092768, + "step": 1742 + }, + { + "epoch": 0.44712370935676266, + "grad_norm": 54.936180114746094, + "learning_rate": 5e-06, + "loss": 1.0755, + "num_input_tokens_seen": 220218764, + "step": 1743 + }, + { + "epoch": 0.44712370935676266, + "loss": 1.2308447360992432, + "loss_ce": 0.000864313100464642, + "loss_iou": 0.54296875, + "loss_num": 0.0279541015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 220218764, + "step": 1743 + }, + { + "epoch": 0.44738023472070804, + "grad_norm": 54.46271896362305, + "learning_rate": 5e-06, + "loss": 1.2558, + "num_input_tokens_seen": 220345436, + "step": 1744 + }, + { + "epoch": 0.44738023472070804, + "loss": 1.337061882019043, + "loss_ce": 0.001612723572179675, + "loss_iou": 0.59375, + "loss_num": 0.029541015625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 220345436, + "step": 1744 + }, + { + "epoch": 0.44763676008465336, + "grad_norm": 74.26089477539062, + "learning_rate": 5e-06, + "loss": 0.9805, + "num_input_tokens_seen": 220471580, + "step": 1745 + }, + { + "epoch": 0.44763676008465336, + "loss": 0.9207468628883362, + "loss_ce": 0.0013132905587553978, + "loss_iou": 0.4375, + "loss_num": 0.0086669921875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 220471580, + "step": 1745 + }, + { + "epoch": 0.44789328544859874, + "grad_norm": 48.16965866088867, + "learning_rate": 5e-06, + "loss": 1.2025, + "num_input_tokens_seen": 220596952, + "step": 1746 + }, + { + "epoch": 0.44789328544859874, + "loss": 1.2162046432495117, + "loss_ce": 0.0003843932645395398, + "loss_iou": 0.56640625, + "loss_num": 0.0166015625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 220596952, + "step": 1746 + }, + { + "epoch": 0.44814981081254407, + "grad_norm": 45.7118034362793, + "learning_rate": 5e-06, + "loss": 1.0403, + "num_input_tokens_seen": 220723320, + "step": 1747 + }, + { + "epoch": 0.44814981081254407, + "loss": 1.120827078819275, + "loss_ce": 0.00022159266518428922, + "loss_iou": 0.51953125, + "loss_num": 0.0157470703125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 220723320, + "step": 1747 + }, + { + "epoch": 0.44840633617648945, + "grad_norm": 85.08150482177734, + "learning_rate": 5e-06, + "loss": 1.0256, + "num_input_tokens_seen": 220849444, + "step": 1748 + }, + { + "epoch": 0.44840633617648945, + "loss": 0.9492961764335632, + "loss_ce": 0.0020305525977164507, + "loss_iou": 0.451171875, + "loss_num": 0.0089111328125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 220849444, + "step": 1748 + }, + { + "epoch": 0.44866286154043483, + "grad_norm": 48.142723083496094, + "learning_rate": 5e-06, + "loss": 1.2591, + "num_input_tokens_seen": 220975668, + "step": 1749 + }, + { + "epoch": 0.44866286154043483, + "loss": 1.2303417921066284, + "loss_ce": 0.0013378707226365805, + "loss_iou": 0.5859375, + "loss_num": 0.01153564453125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 220975668, + "step": 1749 + }, + { + "epoch": 0.44891938690438016, + "grad_norm": 33.37443161010742, + "learning_rate": 5e-06, + "loss": 0.9492, + "num_input_tokens_seen": 221101092, + "step": 1750 + }, + { + "epoch": 0.44891938690438016, + "eval_icons_CIoU": 0.13520523346960545, + "eval_icons_GIoU": 0.09360607946291566, + "eval_icons_IoU": 0.33239367604255676, + "eval_icons_MAE_all": 0.03282083850353956, + "eval_icons_MAE_h": 0.05826394818723202, + "eval_icons_MAE_w": 0.05559726431965828, + "eval_icons_MAE_x_boxes": 0.054289765655994415, + "eval_icons_MAE_y_boxes": 0.058288367465138435, + "eval_icons_NUM_probability": 0.9995181262493134, + "eval_icons_inside_bbox": 0.5659722238779068, + "eval_icons_loss": 1.9127000570297241, + "eval_icons_loss_ce": 0.0001715321996016428, + "eval_icons_loss_iou": 0.87646484375, + "eval_icons_loss_num": 0.036895751953125, + "eval_icons_loss_xval": 1.93798828125, + "eval_icons_runtime": 63.7316, + "eval_icons_samples_per_second": 0.785, + "eval_icons_steps_per_second": 0.031, + "num_input_tokens_seen": 221101092, + "step": 1750 + }, + { + "epoch": 0.44891938690438016, + "eval_screenspot_CIoU": 0.13932100435098013, + "eval_screenspot_GIoU": 0.12832056730985641, + "eval_screenspot_IoU": 0.30554496745268506, + "eval_screenspot_MAE_all": 0.08185822144150734, + "eval_screenspot_MAE_h": 0.06552359213431676, + "eval_screenspot_MAE_w": 0.13179517289002737, + "eval_screenspot_MAE_x_boxes": 0.10988386223713557, + "eval_screenspot_MAE_y_boxes": 0.059355118622382484, + "eval_screenspot_NUM_probability": 0.9998281002044678, + "eval_screenspot_inside_bbox": 0.6358333428700765, + "eval_screenspot_loss": 2.2082271575927734, + "eval_screenspot_loss_ce": 0.0019252800848335028, + "eval_screenspot_loss_iou": 0.907470703125, + "eval_screenspot_loss_num": 0.0882568359375, + "eval_screenspot_loss_xval": 2.2571614583333335, + "eval_screenspot_runtime": 105.2121, + "eval_screenspot_samples_per_second": 0.846, + "eval_screenspot_steps_per_second": 0.029, + "num_input_tokens_seen": 221101092, + "step": 1750 + }, + { + "epoch": 0.44891938690438016, + "loss": 2.137132167816162, + "loss_ce": 0.0013901516795158386, + "loss_iou": 0.87890625, + "loss_num": 0.07568359375, + "loss_xval": 2.140625, + "num_input_tokens_seen": 221101092, + "step": 1750 + }, + { + "epoch": 0.44917591226832554, + "grad_norm": 65.04472351074219, + "learning_rate": 5e-06, + "loss": 1.0779, + "num_input_tokens_seen": 221227408, + "step": 1751 + }, + { + "epoch": 0.44917591226832554, + "loss": 1.097533941268921, + "loss_ce": 0.00036594917764887214, + "loss_iou": 0.515625, + "loss_num": 0.0137939453125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 221227408, + "step": 1751 + }, + { + "epoch": 0.44943243763227086, + "grad_norm": 58.349761962890625, + "learning_rate": 5e-06, + "loss": 1.1219, + "num_input_tokens_seen": 221354612, + "step": 1752 + }, + { + "epoch": 0.44943243763227086, + "loss": 1.2975740432739258, + "loss_ce": 0.0011872686445713043, + "loss_iou": 0.578125, + "loss_num": 0.0286865234375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 221354612, + "step": 1752 + }, + { + "epoch": 0.44968896299621625, + "grad_norm": 40.07772445678711, + "learning_rate": 5e-06, + "loss": 1.2295, + "num_input_tokens_seen": 221480192, + "step": 1753 + }, + { + "epoch": 0.44968896299621625, + "loss": 1.2535150051116943, + "loss_ce": 0.0010735716205090284, + "loss_iou": 0.5703125, + "loss_num": 0.0224609375, + "loss_xval": 1.25, + "num_input_tokens_seen": 221480192, + "step": 1753 + }, + { + "epoch": 0.4499454883601616, + "grad_norm": 44.417903900146484, + "learning_rate": 5e-06, + "loss": 1.0847, + "num_input_tokens_seen": 221606036, + "step": 1754 + }, + { + "epoch": 0.4499454883601616, + "loss": 1.1093040704727173, + "loss_ce": 0.0013939510099589825, + "loss_iou": 0.51171875, + "loss_num": 0.017333984375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 221606036, + "step": 1754 + }, + { + "epoch": 0.45020201372410695, + "grad_norm": 82.19584655761719, + "learning_rate": 5e-06, + "loss": 1.0316, + "num_input_tokens_seen": 221733884, + "step": 1755 + }, + { + "epoch": 0.45020201372410695, + "loss": 0.8864402770996094, + "loss_ce": 0.0021629538387060165, + "loss_iou": 0.416015625, + "loss_num": 0.0103759765625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 221733884, + "step": 1755 + }, + { + "epoch": 0.45045853908805233, + "grad_norm": 54.95599365234375, + "learning_rate": 5e-06, + "loss": 1.1545, + "num_input_tokens_seen": 221859656, + "step": 1756 + }, + { + "epoch": 0.45045853908805233, + "loss": 1.0377905368804932, + "loss_ce": 0.0006811793427914381, + "loss_iou": 0.49609375, + "loss_num": 0.0093994140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 221859656, + "step": 1756 + }, + { + "epoch": 0.4507150644519977, + "grad_norm": 50.52367401123047, + "learning_rate": 5e-06, + "loss": 1.0899, + "num_input_tokens_seen": 221986800, + "step": 1757 + }, + { + "epoch": 0.4507150644519977, + "loss": 0.935173749923706, + "loss_ce": 0.0006034575053490698, + "loss_iou": 0.443359375, + "loss_num": 0.00994873046875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 221986800, + "step": 1757 + }, + { + "epoch": 0.45097158981594304, + "grad_norm": 67.1668701171875, + "learning_rate": 5e-06, + "loss": 1.1084, + "num_input_tokens_seen": 222113588, + "step": 1758 + }, + { + "epoch": 0.45097158981594304, + "loss": 1.1809331178665161, + "loss_ce": 0.007104948628693819, + "loss_iou": 0.51953125, + "loss_num": 0.027587890625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 222113588, + "step": 1758 + }, + { + "epoch": 0.4512281151798884, + "grad_norm": 49.523834228515625, + "learning_rate": 5e-06, + "loss": 1.1212, + "num_input_tokens_seen": 222239476, + "step": 1759 + }, + { + "epoch": 0.4512281151798884, + "loss": 1.2047364711761475, + "loss_ce": 0.0011232678079977632, + "loss_iou": 0.53125, + "loss_num": 0.027587890625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 222239476, + "step": 1759 + }, + { + "epoch": 0.45148464054383375, + "grad_norm": 39.00166702270508, + "learning_rate": 5e-06, + "loss": 0.9189, + "num_input_tokens_seen": 222365960, + "step": 1760 + }, + { + "epoch": 0.45148464054383375, + "loss": 0.8535187840461731, + "loss_ce": 0.000491458224132657, + "loss_iou": 0.390625, + "loss_num": 0.0142822265625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 222365960, + "step": 1760 + }, + { + "epoch": 0.45174116590777913, + "grad_norm": 48.74122619628906, + "learning_rate": 5e-06, + "loss": 1.0079, + "num_input_tokens_seen": 222493120, + "step": 1761 + }, + { + "epoch": 0.45174116590777913, + "loss": 1.0591301918029785, + "loss_ce": 0.0010247546015307307, + "loss_iou": 0.484375, + "loss_num": 0.017578125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 222493120, + "step": 1761 + }, + { + "epoch": 0.4519976912717245, + "grad_norm": 57.42464065551758, + "learning_rate": 5e-06, + "loss": 1.1333, + "num_input_tokens_seen": 222618836, + "step": 1762 + }, + { + "epoch": 0.4519976912717245, + "loss": 1.053152322769165, + "loss_ce": 0.0018827388994395733, + "loss_iou": 0.48828125, + "loss_num": 0.0147705078125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 222618836, + "step": 1762 + }, + { + "epoch": 0.45225421663566984, + "grad_norm": 53.764163970947266, + "learning_rate": 5e-06, + "loss": 1.1735, + "num_input_tokens_seen": 222744376, + "step": 1763 + }, + { + "epoch": 0.45225421663566984, + "loss": 1.3524705171585083, + "loss_ce": 0.0033494741655886173, + "loss_iou": 0.61328125, + "loss_num": 0.0247802734375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 222744376, + "step": 1763 + }, + { + "epoch": 0.4525107419996152, + "grad_norm": 73.12234497070312, + "learning_rate": 5e-06, + "loss": 1.0865, + "num_input_tokens_seen": 222871892, + "step": 1764 + }, + { + "epoch": 0.4525107419996152, + "loss": 0.9467097520828247, + "loss_ce": 0.003350441576912999, + "loss_iou": 0.43359375, + "loss_num": 0.014892578125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 222871892, + "step": 1764 + }, + { + "epoch": 0.4527672673635606, + "grad_norm": 55.75526428222656, + "learning_rate": 5e-06, + "loss": 1.1035, + "num_input_tokens_seen": 222998220, + "step": 1765 + }, + { + "epoch": 0.4527672673635606, + "loss": 1.3217352628707886, + "loss_ce": 0.0009344975696876645, + "loss_iou": 0.59375, + "loss_num": 0.02587890625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 222998220, + "step": 1765 + }, + { + "epoch": 0.4530237927275059, + "grad_norm": 35.10811233520508, + "learning_rate": 5e-06, + "loss": 0.9668, + "num_input_tokens_seen": 223124288, + "step": 1766 + }, + { + "epoch": 0.4530237927275059, + "loss": 0.971388578414917, + "loss_ce": 0.0016620028764009476, + "loss_iou": 0.45703125, + "loss_num": 0.0108642578125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 223124288, + "step": 1766 + }, + { + "epoch": 0.4532803180914513, + "grad_norm": 41.83962631225586, + "learning_rate": 5e-06, + "loss": 1.0906, + "num_input_tokens_seen": 223251036, + "step": 1767 + }, + { + "epoch": 0.4532803180914513, + "loss": 1.3172807693481445, + "loss_ce": 0.003804178908467293, + "loss_iou": 0.59765625, + "loss_num": 0.0244140625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 223251036, + "step": 1767 + }, + { + "epoch": 0.45353684345539663, + "grad_norm": 75.01993560791016, + "learning_rate": 5e-06, + "loss": 1.1356, + "num_input_tokens_seen": 223377328, + "step": 1768 + }, + { + "epoch": 0.45353684345539663, + "loss": 0.9791259169578552, + "loss_ce": 0.0025634029880166054, + "loss_iou": 0.451171875, + "loss_num": 0.0146484375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 223377328, + "step": 1768 + }, + { + "epoch": 0.453793368819342, + "grad_norm": 53.41657257080078, + "learning_rate": 5e-06, + "loss": 1.1458, + "num_input_tokens_seen": 223504412, + "step": 1769 + }, + { + "epoch": 0.453793368819342, + "loss": 1.106865406036377, + "loss_ce": 0.0023732022382318974, + "loss_iou": 0.51171875, + "loss_num": 0.0157470703125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 223504412, + "step": 1769 + }, + { + "epoch": 0.4540498941832874, + "grad_norm": 21.908044815063477, + "learning_rate": 5e-06, + "loss": 0.9441, + "num_input_tokens_seen": 223630272, + "step": 1770 + }, + { + "epoch": 0.4540498941832874, + "loss": 0.8534073233604431, + "loss_ce": 0.0008682726183906198, + "loss_iou": 0.3984375, + "loss_num": 0.0115966796875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 223630272, + "step": 1770 + }, + { + "epoch": 0.4543064195472327, + "grad_norm": 20.499309539794922, + "learning_rate": 5e-06, + "loss": 0.9913, + "num_input_tokens_seen": 223756240, + "step": 1771 + }, + { + "epoch": 0.4543064195472327, + "loss": 0.8340408802032471, + "loss_ce": 0.0003006830520462245, + "loss_iou": 0.37890625, + "loss_num": 0.01495361328125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 223756240, + "step": 1771 + }, + { + "epoch": 0.4545629449111781, + "grad_norm": 44.41531753540039, + "learning_rate": 5e-06, + "loss": 1.2043, + "num_input_tokens_seen": 223882360, + "step": 1772 + }, + { + "epoch": 0.4545629449111781, + "loss": 1.0591591596603394, + "loss_ce": 0.005448200739920139, + "loss_iou": 0.47265625, + "loss_num": 0.021240234375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 223882360, + "step": 1772 + }, + { + "epoch": 0.4548194702751234, + "grad_norm": 53.299381256103516, + "learning_rate": 5e-06, + "loss": 1.1289, + "num_input_tokens_seen": 224007764, + "step": 1773 + }, + { + "epoch": 0.4548194702751234, + "loss": 1.4351820945739746, + "loss_ce": 0.0006116722943261266, + "loss_iou": 0.65234375, + "loss_num": 0.0252685546875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 224007764, + "step": 1773 + }, + { + "epoch": 0.4550759956390688, + "grad_norm": 51.93644714355469, + "learning_rate": 5e-06, + "loss": 1.0473, + "num_input_tokens_seen": 224134064, + "step": 1774 + }, + { + "epoch": 0.4550759956390688, + "loss": 1.0699505805969238, + "loss_ce": 0.0030560598243027925, + "loss_iou": 0.49609375, + "loss_num": 0.01544189453125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 224134064, + "step": 1774 + }, + { + "epoch": 0.4553325210030142, + "grad_norm": 58.659854888916016, + "learning_rate": 5e-06, + "loss": 1.1839, + "num_input_tokens_seen": 224259744, + "step": 1775 + }, + { + "epoch": 0.4553325210030142, + "loss": 1.148506999015808, + "loss_ce": 0.0005577196134254336, + "loss_iou": 0.52734375, + "loss_num": 0.01806640625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 224259744, + "step": 1775 + }, + { + "epoch": 0.4555890463669595, + "grad_norm": 47.804054260253906, + "learning_rate": 5e-06, + "loss": 0.9735, + "num_input_tokens_seen": 224386844, + "step": 1776 + }, + { + "epoch": 0.4555890463669595, + "loss": 0.8405540585517883, + "loss_ce": 0.0016868961974978447, + "loss_iou": 0.392578125, + "loss_num": 0.01068115234375, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 224386844, + "step": 1776 + }, + { + "epoch": 0.4558455717309049, + "grad_norm": 46.61387252807617, + "learning_rate": 5e-06, + "loss": 1.0408, + "num_input_tokens_seen": 224513500, + "step": 1777 + }, + { + "epoch": 0.4558455717309049, + "loss": 1.0193055868148804, + "loss_ce": 0.005877839867025614, + "loss_iou": 0.45703125, + "loss_num": 0.020263671875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 224513500, + "step": 1777 + }, + { + "epoch": 0.4561020970948503, + "grad_norm": 58.60307312011719, + "learning_rate": 5e-06, + "loss": 1.0395, + "num_input_tokens_seen": 224639844, + "step": 1778 + }, + { + "epoch": 0.4561020970948503, + "loss": 1.1209123134613037, + "loss_ce": 0.0042130243964493275, + "loss_iou": 0.494140625, + "loss_num": 0.0255126953125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 224639844, + "step": 1778 + }, + { + "epoch": 0.4563586224587956, + "grad_norm": 57.254234313964844, + "learning_rate": 5e-06, + "loss": 1.1969, + "num_input_tokens_seen": 224766448, + "step": 1779 + }, + { + "epoch": 0.4563586224587956, + "loss": 1.154077410697937, + "loss_ce": 0.0017336651217192411, + "loss_iou": 0.54296875, + "loss_num": 0.01318359375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 224766448, + "step": 1779 + }, + { + "epoch": 0.456615147822741, + "grad_norm": 47.691226959228516, + "learning_rate": 5e-06, + "loss": 0.99, + "num_input_tokens_seen": 224892048, + "step": 1780 + }, + { + "epoch": 0.456615147822741, + "loss": 0.8634731769561768, + "loss_ce": 0.008004425093531609, + "loss_iou": 0.412109375, + "loss_num": 0.0062255859375, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 224892048, + "step": 1780 + }, + { + "epoch": 0.4568716731866863, + "grad_norm": 71.93791198730469, + "learning_rate": 5e-06, + "loss": 1.1428, + "num_input_tokens_seen": 225019348, + "step": 1781 + }, + { + "epoch": 0.4568716731866863, + "loss": 1.0822391510009766, + "loss_ce": 0.0006961679318919778, + "loss_iou": 0.5078125, + "loss_num": 0.01336669921875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 225019348, + "step": 1781 + }, + { + "epoch": 0.4571281985506317, + "grad_norm": 77.20756530761719, + "learning_rate": 5e-06, + "loss": 1.2089, + "num_input_tokens_seen": 225145096, + "step": 1782 + }, + { + "epoch": 0.4571281985506317, + "loss": 1.2265117168426514, + "loss_ce": 0.0023906128481030464, + "loss_iou": 0.5625, + "loss_num": 0.020263671875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 225145096, + "step": 1782 + }, + { + "epoch": 0.4573847239145771, + "grad_norm": 55.06850051879883, + "learning_rate": 5e-06, + "loss": 1.2276, + "num_input_tokens_seen": 225272296, + "step": 1783 + }, + { + "epoch": 0.4573847239145771, + "loss": 1.2547285556793213, + "loss_ce": 0.001798930810764432, + "loss_iou": 0.56640625, + "loss_num": 0.0234375, + "loss_xval": 1.25, + "num_input_tokens_seen": 225272296, + "step": 1783 + }, + { + "epoch": 0.4576412492785224, + "grad_norm": 47.69963073730469, + "learning_rate": 5e-06, + "loss": 1.1367, + "num_input_tokens_seen": 225399192, + "step": 1784 + }, + { + "epoch": 0.4576412492785224, + "loss": 1.0881521701812744, + "loss_ce": 0.0007497383048757911, + "loss_iou": 0.50390625, + "loss_num": 0.0159912109375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 225399192, + "step": 1784 + }, + { + "epoch": 0.4578977746424678, + "grad_norm": 50.797889709472656, + "learning_rate": 5e-06, + "loss": 1.1077, + "num_input_tokens_seen": 225524540, + "step": 1785 + }, + { + "epoch": 0.4578977746424678, + "loss": 1.0107976198196411, + "loss_ce": 0.003961687907576561, + "loss_iou": 0.47265625, + "loss_num": 0.01214599609375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 225524540, + "step": 1785 + }, + { + "epoch": 0.4581543000064131, + "grad_norm": 70.14015197753906, + "learning_rate": 5e-06, + "loss": 1.146, + "num_input_tokens_seen": 225650852, + "step": 1786 + }, + { + "epoch": 0.4581543000064131, + "loss": 1.058988094329834, + "loss_ce": 0.0013709395425394177, + "loss_iou": 0.486328125, + "loss_num": 0.0174560546875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 225650852, + "step": 1786 + }, + { + "epoch": 0.4584108253703585, + "grad_norm": 45.43425369262695, + "learning_rate": 5e-06, + "loss": 1.1859, + "num_input_tokens_seen": 225777556, + "step": 1787 + }, + { + "epoch": 0.4584108253703585, + "loss": 1.1797524690628052, + "loss_ce": 0.0020180712454020977, + "loss_iou": 0.54296875, + "loss_num": 0.01806640625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 225777556, + "step": 1787 + }, + { + "epoch": 0.45866735073430387, + "grad_norm": 40.239158630371094, + "learning_rate": 5e-06, + "loss": 1.0156, + "num_input_tokens_seen": 225903552, + "step": 1788 + }, + { + "epoch": 0.45866735073430387, + "loss": 0.9056026935577393, + "loss_ce": 0.0010616483632475138, + "loss_iou": 0.427734375, + "loss_num": 0.00994873046875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 225903552, + "step": 1788 + }, + { + "epoch": 0.4589238760982492, + "grad_norm": 52.5267219543457, + "learning_rate": 5e-06, + "loss": 1.1194, + "num_input_tokens_seen": 226029732, + "step": 1789 + }, + { + "epoch": 0.4589238760982492, + "loss": 0.9817174077033997, + "loss_ce": 0.00027208661776967347, + "loss_iou": 0.47265625, + "loss_num": 0.007720947265625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 226029732, + "step": 1789 + }, + { + "epoch": 0.4591804014621946, + "grad_norm": 41.42628479003906, + "learning_rate": 5e-06, + "loss": 1.0112, + "num_input_tokens_seen": 226156232, + "step": 1790 + }, + { + "epoch": 0.4591804014621946, + "loss": 1.1469049453735352, + "loss_ce": 0.000908812799025327, + "loss_iou": 0.51953125, + "loss_num": 0.0208740234375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 226156232, + "step": 1790 + }, + { + "epoch": 0.45943692682613996, + "grad_norm": 35.50095748901367, + "learning_rate": 5e-06, + "loss": 1.0287, + "num_input_tokens_seen": 226282468, + "step": 1791 + }, + { + "epoch": 0.45943692682613996, + "loss": 0.917578399181366, + "loss_ce": 9.793409117264673e-05, + "loss_iou": 0.4296875, + "loss_num": 0.01171875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 226282468, + "step": 1791 + }, + { + "epoch": 0.4596934521900853, + "grad_norm": 41.90004348754883, + "learning_rate": 5e-06, + "loss": 1.1602, + "num_input_tokens_seen": 226407500, + "step": 1792 + }, + { + "epoch": 0.4596934521900853, + "loss": 1.079375147819519, + "loss_ce": 0.00125010940246284, + "loss_iou": 0.498046875, + "loss_num": 0.0166015625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 226407500, + "step": 1792 + }, + { + "epoch": 0.45994997755403066, + "grad_norm": 45.10520935058594, + "learning_rate": 5e-06, + "loss": 1.0372, + "num_input_tokens_seen": 226532952, + "step": 1793 + }, + { + "epoch": 0.45994997755403066, + "loss": 1.0435144901275635, + "loss_ce": 0.0005456857616081834, + "loss_iou": 0.4765625, + "loss_num": 0.017822265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 226532952, + "step": 1793 + }, + { + "epoch": 0.460206502917976, + "grad_norm": 42.57160568237305, + "learning_rate": 5e-06, + "loss": 1.0469, + "num_input_tokens_seen": 226659088, + "step": 1794 + }, + { + "epoch": 0.460206502917976, + "loss": 0.8785485029220581, + "loss_ce": 0.0015953680267557502, + "loss_iou": 0.41015625, + "loss_num": 0.011474609375, + "loss_xval": 0.875, + "num_input_tokens_seen": 226659088, + "step": 1794 + }, + { + "epoch": 0.46046302828192137, + "grad_norm": 85.52832794189453, + "learning_rate": 5e-06, + "loss": 1.1498, + "num_input_tokens_seen": 226785940, + "step": 1795 + }, + { + "epoch": 0.46046302828192137, + "loss": 1.0807669162750244, + "loss_ce": 0.003130094613879919, + "loss_iou": 0.4921875, + "loss_num": 0.0181884765625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 226785940, + "step": 1795 + }, + { + "epoch": 0.46071955364586675, + "grad_norm": 43.86018753051758, + "learning_rate": 5e-06, + "loss": 1.1319, + "num_input_tokens_seen": 226911396, + "step": 1796 + }, + { + "epoch": 0.46071955364586675, + "loss": 1.051501989364624, + "loss_ce": 0.0016974173486232758, + "loss_iou": 0.48828125, + "loss_num": 0.01434326171875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 226911396, + "step": 1796 + }, + { + "epoch": 0.4609760790098121, + "grad_norm": 32.8549690246582, + "learning_rate": 5e-06, + "loss": 1.1345, + "num_input_tokens_seen": 227037092, + "step": 1797 + }, + { + "epoch": 0.4609760790098121, + "loss": 1.0192904472351074, + "loss_ce": 0.0029329685494303703, + "loss_iou": 0.45703125, + "loss_num": 0.0206298828125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 227037092, + "step": 1797 + }, + { + "epoch": 0.46123260437375746, + "grad_norm": 30.514177322387695, + "learning_rate": 5e-06, + "loss": 1.1265, + "num_input_tokens_seen": 227163092, + "step": 1798 + }, + { + "epoch": 0.46123260437375746, + "loss": 1.148848533630371, + "loss_ce": 0.0018758311634883285, + "loss_iou": 0.515625, + "loss_num": 0.0238037109375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 227163092, + "step": 1798 + }, + { + "epoch": 0.46148912973770284, + "grad_norm": 41.02392578125, + "learning_rate": 5e-06, + "loss": 1.0624, + "num_input_tokens_seen": 227288884, + "step": 1799 + }, + { + "epoch": 0.46148912973770284, + "loss": 1.4358460903167725, + "loss_ce": 0.0012757527874782681, + "loss_iou": 0.66015625, + "loss_num": 0.022705078125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 227288884, + "step": 1799 + }, + { + "epoch": 0.46174565510164817, + "grad_norm": 40.647464752197266, + "learning_rate": 5e-06, + "loss": 0.9747, + "num_input_tokens_seen": 227415712, + "step": 1800 + }, + { + "epoch": 0.46174565510164817, + "loss": 1.0220820903778076, + "loss_ce": 0.00499228248372674, + "loss_iou": 0.462890625, + "loss_num": 0.01806640625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 227415712, + "step": 1800 + }, + { + "epoch": 0.46200218046559355, + "grad_norm": 45.96141052246094, + "learning_rate": 5e-06, + "loss": 1.0637, + "num_input_tokens_seen": 227542180, + "step": 1801 + }, + { + "epoch": 0.46200218046559355, + "loss": 1.1472458839416504, + "loss_ce": 0.004667793866246939, + "loss_iou": 0.498046875, + "loss_num": 0.0286865234375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 227542180, + "step": 1801 + }, + { + "epoch": 0.4622587058295389, + "grad_norm": 48.89726638793945, + "learning_rate": 5e-06, + "loss": 1.0363, + "num_input_tokens_seen": 227668608, + "step": 1802 + }, + { + "epoch": 0.4622587058295389, + "loss": 0.9320327043533325, + "loss_ce": 0.00039204751374199986, + "loss_iou": 0.439453125, + "loss_num": 0.0103759765625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 227668608, + "step": 1802 + }, + { + "epoch": 0.46251523119348426, + "grad_norm": 47.7996826171875, + "learning_rate": 5e-06, + "loss": 1.0521, + "num_input_tokens_seen": 227794348, + "step": 1803 + }, + { + "epoch": 0.46251523119348426, + "loss": 0.9851279854774475, + "loss_ce": 0.001241211430169642, + "loss_iou": 0.46484375, + "loss_num": 0.010498046875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 227794348, + "step": 1803 + }, + { + "epoch": 0.46277175655742964, + "grad_norm": 55.55686950683594, + "learning_rate": 5e-06, + "loss": 0.96, + "num_input_tokens_seen": 227920024, + "step": 1804 + }, + { + "epoch": 0.46277175655742964, + "loss": 0.8306246995925903, + "loss_ce": 0.0005465293070301414, + "loss_iou": 0.39453125, + "loss_num": 0.0081787109375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 227920024, + "step": 1804 + }, + { + "epoch": 0.46302828192137496, + "grad_norm": 62.18184280395508, + "learning_rate": 5e-06, + "loss": 1.0035, + "num_input_tokens_seen": 228046804, + "step": 1805 + }, + { + "epoch": 0.46302828192137496, + "loss": 1.0716681480407715, + "loss_ce": 0.002332109957933426, + "loss_iou": 0.5, + "loss_num": 0.013671875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 228046804, + "step": 1805 + }, + { + "epoch": 0.46328480728532034, + "grad_norm": 51.51101303100586, + "learning_rate": 5e-06, + "loss": 1.1291, + "num_input_tokens_seen": 228172136, + "step": 1806 + }, + { + "epoch": 0.46328480728532034, + "loss": 1.269708514213562, + "loss_ce": 0.0031068851239979267, + "loss_iou": 0.578125, + "loss_num": 0.0218505859375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 228172136, + "step": 1806 + }, + { + "epoch": 0.46354133264926567, + "grad_norm": 57.650482177734375, + "learning_rate": 5e-06, + "loss": 1.0305, + "num_input_tokens_seen": 228299092, + "step": 1807 + }, + { + "epoch": 0.46354133264926567, + "loss": 0.9157262444496155, + "loss_ce": 0.0016637363005429506, + "loss_iou": 0.42578125, + "loss_num": 0.01251220703125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 228299092, + "step": 1807 + }, + { + "epoch": 0.46379785801321105, + "grad_norm": 51.743247985839844, + "learning_rate": 5e-06, + "loss": 1.0898, + "num_input_tokens_seen": 228425312, + "step": 1808 + }, + { + "epoch": 0.46379785801321105, + "loss": 1.1288851499557495, + "loss_ce": 0.0004672062932513654, + "loss_iou": 0.51171875, + "loss_num": 0.021484375, + "loss_xval": 1.125, + "num_input_tokens_seen": 228425312, + "step": 1808 + }, + { + "epoch": 0.46405438337715643, + "grad_norm": 55.841487884521484, + "learning_rate": 5e-06, + "loss": 1.0511, + "num_input_tokens_seen": 228551700, + "step": 1809 + }, + { + "epoch": 0.46405438337715643, + "loss": 1.0749510526657104, + "loss_ce": 0.0012206027749925852, + "loss_iou": 0.49609375, + "loss_num": 0.0166015625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 228551700, + "step": 1809 + }, + { + "epoch": 0.46431090874110176, + "grad_norm": 53.72734069824219, + "learning_rate": 5e-06, + "loss": 1.171, + "num_input_tokens_seen": 228678904, + "step": 1810 + }, + { + "epoch": 0.46431090874110176, + "loss": 1.3807734251022339, + "loss_ce": 0.004308619536459446, + "loss_iou": 0.60546875, + "loss_num": 0.033203125, + "loss_xval": 1.375, + "num_input_tokens_seen": 228678904, + "step": 1810 + }, + { + "epoch": 0.46456743410504714, + "grad_norm": 50.47077560424805, + "learning_rate": 5e-06, + "loss": 1.0327, + "num_input_tokens_seen": 228805080, + "step": 1811 + }, + { + "epoch": 0.46456743410504714, + "loss": 0.9544177651405334, + "loss_ce": 0.0017810547724366188, + "loss_iou": 0.4375, + "loss_num": 0.01513671875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 228805080, + "step": 1811 + }, + { + "epoch": 0.4648239594689925, + "grad_norm": 45.25423049926758, + "learning_rate": 5e-06, + "loss": 1.0238, + "num_input_tokens_seen": 228930680, + "step": 1812 + }, + { + "epoch": 0.4648239594689925, + "loss": 1.1679481267929077, + "loss_ce": 0.0004676592070609331, + "loss_iou": 0.54296875, + "loss_num": 0.016845703125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 228930680, + "step": 1812 + }, + { + "epoch": 0.46508048483293785, + "grad_norm": 33.005863189697266, + "learning_rate": 5e-06, + "loss": 0.9474, + "num_input_tokens_seen": 229057132, + "step": 1813 + }, + { + "epoch": 0.46508048483293785, + "loss": 1.0210117101669312, + "loss_ce": 0.0005038633826188743, + "loss_iou": 0.470703125, + "loss_num": 0.0159912109375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 229057132, + "step": 1813 + }, + { + "epoch": 0.4653370101968832, + "grad_norm": 48.521583557128906, + "learning_rate": 5e-06, + "loss": 1.0525, + "num_input_tokens_seen": 229183564, + "step": 1814 + }, + { + "epoch": 0.4653370101968832, + "loss": 1.021003246307373, + "loss_ce": 0.0009836989920586348, + "loss_iou": 0.4609375, + "loss_num": 0.0194091796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 229183564, + "step": 1814 + }, + { + "epoch": 0.46559353556082855, + "grad_norm": 52.33184051513672, + "learning_rate": 5e-06, + "loss": 1.0733, + "num_input_tokens_seen": 229310196, + "step": 1815 + }, + { + "epoch": 0.46559353556082855, + "loss": 1.217082142829895, + "loss_ce": 0.005656345281749964, + "loss_iou": 0.56640625, + "loss_num": 0.0157470703125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 229310196, + "step": 1815 + }, + { + "epoch": 0.46585006092477393, + "grad_norm": 77.88722229003906, + "learning_rate": 5e-06, + "loss": 1.0679, + "num_input_tokens_seen": 229435928, + "step": 1816 + }, + { + "epoch": 0.46585006092477393, + "loss": 1.2572338581085205, + "loss_ce": 0.00039804953848943114, + "loss_iou": 0.578125, + "loss_num": 0.0208740234375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 229435928, + "step": 1816 + }, + { + "epoch": 0.4661065862887193, + "grad_norm": 54.415672302246094, + "learning_rate": 5e-06, + "loss": 1.0738, + "num_input_tokens_seen": 229562624, + "step": 1817 + }, + { + "epoch": 0.4661065862887193, + "loss": 0.970221221446991, + "loss_ce": 0.0004946249537169933, + "loss_iou": 0.45703125, + "loss_num": 0.0111083984375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 229562624, + "step": 1817 + }, + { + "epoch": 0.46636311165266464, + "grad_norm": 47.427982330322266, + "learning_rate": 5e-06, + "loss": 1.015, + "num_input_tokens_seen": 229688444, + "step": 1818 + }, + { + "epoch": 0.46636311165266464, + "loss": 1.1745421886444092, + "loss_ce": 0.004132051952183247, + "loss_iou": 0.5390625, + "loss_num": 0.017822265625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 229688444, + "step": 1818 + }, + { + "epoch": 0.46661963701661, + "grad_norm": 59.17680358886719, + "learning_rate": 5e-06, + "loss": 1.0897, + "num_input_tokens_seen": 229815112, + "step": 1819 + }, + { + "epoch": 0.46661963701661, + "loss": 1.2227951288223267, + "loss_ce": 0.002580307424068451, + "loss_iou": 0.55859375, + "loss_num": 0.0208740234375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 229815112, + "step": 1819 + }, + { + "epoch": 0.4668761623805554, + "grad_norm": 71.73004150390625, + "learning_rate": 5e-06, + "loss": 1.0502, + "num_input_tokens_seen": 229939360, + "step": 1820 + }, + { + "epoch": 0.4668761623805554, + "loss": 0.9030512571334839, + "loss_ce": 0.003148975083604455, + "loss_iou": 0.427734375, + "loss_num": 0.0087890625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 229939360, + "step": 1820 + }, + { + "epoch": 0.46713268774450073, + "grad_norm": 50.68292236328125, + "learning_rate": 5e-06, + "loss": 1.1393, + "num_input_tokens_seen": 230065904, + "step": 1821 + }, + { + "epoch": 0.46713268774450073, + "loss": 1.1499302387237549, + "loss_ce": 0.0005162759916856885, + "loss_iou": 0.546875, + "loss_num": 0.01141357421875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 230065904, + "step": 1821 + }, + { + "epoch": 0.4673892131084461, + "grad_norm": 25.59304428100586, + "learning_rate": 5e-06, + "loss": 1.0622, + "num_input_tokens_seen": 230192408, + "step": 1822 + }, + { + "epoch": 0.4673892131084461, + "loss": 1.0051422119140625, + "loss_ce": 0.00025939734769053757, + "loss_iou": 0.466796875, + "loss_num": 0.01458740234375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 230192408, + "step": 1822 + }, + { + "epoch": 0.46764573847239144, + "grad_norm": 44.37813186645508, + "learning_rate": 5e-06, + "loss": 1.0218, + "num_input_tokens_seen": 230318620, + "step": 1823 + }, + { + "epoch": 0.46764573847239144, + "loss": 0.9911173582077026, + "loss_ce": 0.0013712949585169554, + "loss_iou": 0.474609375, + "loss_num": 0.00830078125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 230318620, + "step": 1823 + }, + { + "epoch": 0.4679022638363368, + "grad_norm": 61.45755386352539, + "learning_rate": 5e-06, + "loss": 1.0956, + "num_input_tokens_seen": 230444432, + "step": 1824 + }, + { + "epoch": 0.4679022638363368, + "loss": 0.9432532787322998, + "loss_ce": 0.0003821811988018453, + "loss_iou": 0.447265625, + "loss_num": 0.009765625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 230444432, + "step": 1824 + }, + { + "epoch": 0.4681587892002822, + "grad_norm": 58.02901077270508, + "learning_rate": 5e-06, + "loss": 1.0768, + "num_input_tokens_seen": 230570904, + "step": 1825 + }, + { + "epoch": 0.4681587892002822, + "loss": 1.2288751602172852, + "loss_ce": 0.0018243174999952316, + "loss_iou": 0.546875, + "loss_num": 0.0263671875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 230570904, + "step": 1825 + }, + { + "epoch": 0.4684153145642275, + "grad_norm": 75.79741668701172, + "learning_rate": 5e-06, + "loss": 0.9345, + "num_input_tokens_seen": 230696964, + "step": 1826 + }, + { + "epoch": 0.4684153145642275, + "loss": 0.9910364151000977, + "loss_ce": 0.0012903306633234024, + "loss_iou": 0.46484375, + "loss_num": 0.01220703125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 230696964, + "step": 1826 + }, + { + "epoch": 0.4686718399281729, + "grad_norm": 54.42466354370117, + "learning_rate": 5e-06, + "loss": 1.1802, + "num_input_tokens_seen": 230823120, + "step": 1827 + }, + { + "epoch": 0.4686718399281729, + "loss": 1.226477026939392, + "loss_ce": 0.008215289562940598, + "loss_iou": 0.5546875, + "loss_num": 0.0216064453125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 230823120, + "step": 1827 + }, + { + "epoch": 0.46892836529211823, + "grad_norm": 30.82295036315918, + "learning_rate": 5e-06, + "loss": 0.9543, + "num_input_tokens_seen": 230949708, + "step": 1828 + }, + { + "epoch": 0.46892836529211823, + "loss": 1.1079235076904297, + "loss_ce": 0.003919580020010471, + "loss_iou": 0.515625, + "loss_num": 0.01446533203125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 230949708, + "step": 1828 + }, + { + "epoch": 0.4691848906560636, + "grad_norm": 44.75107955932617, + "learning_rate": 5e-06, + "loss": 1.0366, + "num_input_tokens_seen": 231075804, + "step": 1829 + }, + { + "epoch": 0.4691848906560636, + "loss": 1.149052381515503, + "loss_ce": 0.0011031217873096466, + "loss_iou": 0.515625, + "loss_num": 0.0228271484375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 231075804, + "step": 1829 + }, + { + "epoch": 0.469441416020009, + "grad_norm": 67.44160461425781, + "learning_rate": 5e-06, + "loss": 0.9795, + "num_input_tokens_seen": 231202088, + "step": 1830 + }, + { + "epoch": 0.469441416020009, + "loss": 0.9768046140670776, + "loss_ce": 0.00024208203831221908, + "loss_iou": 0.466796875, + "loss_num": 0.0089111328125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 231202088, + "step": 1830 + }, + { + "epoch": 0.4696979413839543, + "grad_norm": 54.214454650878906, + "learning_rate": 5e-06, + "loss": 1.1204, + "num_input_tokens_seen": 231328004, + "step": 1831 + }, + { + "epoch": 0.4696979413839543, + "loss": 1.2453057765960693, + "loss_ce": 0.0050712935626506805, + "loss_iou": 0.56640625, + "loss_num": 0.0208740234375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 231328004, + "step": 1831 + }, + { + "epoch": 0.4699544667478997, + "grad_norm": 49.22771072387695, + "learning_rate": 5e-06, + "loss": 1.2341, + "num_input_tokens_seen": 231454460, + "step": 1832 + }, + { + "epoch": 0.4699544667478997, + "loss": 1.3006293773651123, + "loss_ce": 0.0047310153022408485, + "loss_iou": 0.578125, + "loss_num": 0.0284423828125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 231454460, + "step": 1832 + }, + { + "epoch": 0.4702109921118451, + "grad_norm": 63.14383316040039, + "learning_rate": 5e-06, + "loss": 1.0533, + "num_input_tokens_seen": 231581120, + "step": 1833 + }, + { + "epoch": 0.4702109921118451, + "loss": 1.021841287612915, + "loss_ce": 0.002798343077301979, + "loss_iou": 0.4765625, + "loss_num": 0.012939453125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 231581120, + "step": 1833 + }, + { + "epoch": 0.4704675174757904, + "grad_norm": 66.81879425048828, + "learning_rate": 5e-06, + "loss": 1.1464, + "num_input_tokens_seen": 231707140, + "step": 1834 + }, + { + "epoch": 0.4704675174757904, + "loss": 1.1984933614730835, + "loss_ce": 0.001227812608703971, + "loss_iou": 0.546875, + "loss_num": 0.019775390625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 231707140, + "step": 1834 + }, + { + "epoch": 0.4707240428397358, + "grad_norm": 51.97833251953125, + "learning_rate": 5e-06, + "loss": 1.0523, + "num_input_tokens_seen": 231832556, + "step": 1835 + }, + { + "epoch": 0.4707240428397358, + "loss": 0.9346357583999634, + "loss_ce": 0.001041956478729844, + "loss_iou": 0.44921875, + "loss_num": 0.00750732421875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 231832556, + "step": 1835 + }, + { + "epoch": 0.4709805682036811, + "grad_norm": 43.52521896362305, + "learning_rate": 5e-06, + "loss": 1.2287, + "num_input_tokens_seen": 231958360, + "step": 1836 + }, + { + "epoch": 0.4709805682036811, + "loss": 1.0660685300827026, + "loss_ce": 0.00015054289542604238, + "loss_iou": 0.5078125, + "loss_num": 0.01055908203125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 231958360, + "step": 1836 + }, + { + "epoch": 0.4712370935676265, + "grad_norm": 41.290470123291016, + "learning_rate": 5e-06, + "loss": 1.0521, + "num_input_tokens_seen": 232084084, + "step": 1837 + }, + { + "epoch": 0.4712370935676265, + "loss": 1.0947554111480713, + "loss_ce": 0.01077099796384573, + "loss_iou": 0.5078125, + "loss_num": 0.01422119140625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 232084084, + "step": 1837 + }, + { + "epoch": 0.4714936189315719, + "grad_norm": 47.83306121826172, + "learning_rate": 5e-06, + "loss": 1.0932, + "num_input_tokens_seen": 232211160, + "step": 1838 + }, + { + "epoch": 0.4714936189315719, + "loss": 1.3389394283294678, + "loss_ce": 0.0034902358893305063, + "loss_iou": 0.5859375, + "loss_num": 0.032958984375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 232211160, + "step": 1838 + }, + { + "epoch": 0.4717501442955172, + "grad_norm": 51.278358459472656, + "learning_rate": 5e-06, + "loss": 1.0171, + "num_input_tokens_seen": 232337616, + "step": 1839 + }, + { + "epoch": 0.4717501442955172, + "loss": 1.1149177551269531, + "loss_ce": 0.007495948113501072, + "loss_iou": 0.51171875, + "loss_num": 0.0164794921875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 232337616, + "step": 1839 + }, + { + "epoch": 0.4720066696594626, + "grad_norm": 55.204227447509766, + "learning_rate": 5e-06, + "loss": 0.9838, + "num_input_tokens_seen": 232463500, + "step": 1840 + }, + { + "epoch": 0.4720066696594626, + "loss": 0.958401083946228, + "loss_ce": 0.005764373578131199, + "loss_iou": 0.44921875, + "loss_num": 0.0111083984375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 232463500, + "step": 1840 + }, + { + "epoch": 0.47226319502340797, + "grad_norm": 59.67417526245117, + "learning_rate": 5e-06, + "loss": 1.0707, + "num_input_tokens_seen": 232589756, + "step": 1841 + }, + { + "epoch": 0.47226319502340797, + "loss": 0.9808272123336792, + "loss_ce": 0.00035839201882481575, + "loss_iou": 0.45703125, + "loss_num": 0.01275634765625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 232589756, + "step": 1841 + }, + { + "epoch": 0.4725197203873533, + "grad_norm": 62.38352966308594, + "learning_rate": 5e-06, + "loss": 1.1771, + "num_input_tokens_seen": 232716344, + "step": 1842 + }, + { + "epoch": 0.4725197203873533, + "loss": 1.3608180284500122, + "loss_ce": 0.0029079453088343143, + "loss_iou": 0.62890625, + "loss_num": 0.0194091796875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 232716344, + "step": 1842 + }, + { + "epoch": 0.4727762457512987, + "grad_norm": 41.38759231567383, + "learning_rate": 5e-06, + "loss": 1.153, + "num_input_tokens_seen": 232842324, + "step": 1843 + }, + { + "epoch": 0.4727762457512987, + "loss": 1.125563621520996, + "loss_ce": 0.0030050266068428755, + "loss_iou": 0.51953125, + "loss_num": 0.0169677734375, + "loss_xval": 1.125, + "num_input_tokens_seen": 232842324, + "step": 1843 + }, + { + "epoch": 0.473032771115244, + "grad_norm": 46.98886489868164, + "learning_rate": 5e-06, + "loss": 1.0847, + "num_input_tokens_seen": 232969524, + "step": 1844 + }, + { + "epoch": 0.473032771115244, + "loss": 1.0844138860702515, + "loss_ce": 0.0009177774772979319, + "loss_iou": 0.51171875, + "loss_num": 0.01239013671875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 232969524, + "step": 1844 + }, + { + "epoch": 0.4732892964791894, + "grad_norm": 51.617313385009766, + "learning_rate": 5e-06, + "loss": 1.1123, + "num_input_tokens_seen": 233095848, + "step": 1845 + }, + { + "epoch": 0.4732892964791894, + "loss": 1.156531572341919, + "loss_ce": 0.003211225150153041, + "loss_iou": 0.5078125, + "loss_num": 0.0274658203125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 233095848, + "step": 1845 + }, + { + "epoch": 0.47354582184313476, + "grad_norm": 42.13440704345703, + "learning_rate": 5e-06, + "loss": 1.009, + "num_input_tokens_seen": 233221696, + "step": 1846 + }, + { + "epoch": 0.47354582184313476, + "loss": 1.0593225955963135, + "loss_ce": 0.0012170892441645265, + "loss_iou": 0.482421875, + "loss_num": 0.0184326171875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 233221696, + "step": 1846 + }, + { + "epoch": 0.4738023472070801, + "grad_norm": 41.52468490600586, + "learning_rate": 5e-06, + "loss": 1.1316, + "num_input_tokens_seen": 233347744, + "step": 1847 + }, + { + "epoch": 0.4738023472070801, + "loss": 1.277927279472351, + "loss_ce": 0.000583487271796912, + "loss_iou": 0.55859375, + "loss_num": 0.031494140625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 233347744, + "step": 1847 + }, + { + "epoch": 0.47405887257102547, + "grad_norm": 61.751182556152344, + "learning_rate": 5e-06, + "loss": 1.1525, + "num_input_tokens_seen": 233474124, + "step": 1848 + }, + { + "epoch": 0.47405887257102547, + "loss": 1.232661485671997, + "loss_ce": 0.0007280077552422881, + "loss_iou": 0.53125, + "loss_num": 0.033203125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 233474124, + "step": 1848 + }, + { + "epoch": 0.4743153979349708, + "grad_norm": 49.26096725463867, + "learning_rate": 5e-06, + "loss": 1.0396, + "num_input_tokens_seen": 233599284, + "step": 1849 + }, + { + "epoch": 0.4743153979349708, + "loss": 0.8932523727416992, + "loss_ce": 0.003115639090538025, + "loss_iou": 0.419921875, + "loss_num": 0.01043701171875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 233599284, + "step": 1849 + }, + { + "epoch": 0.4745719232989162, + "grad_norm": 54.866111755371094, + "learning_rate": 5e-06, + "loss": 0.9719, + "num_input_tokens_seen": 233725764, + "step": 1850 + }, + { + "epoch": 0.4745719232989162, + "loss": 1.1756889820098877, + "loss_ce": 0.00039602600736543536, + "loss_iou": 0.53125, + "loss_num": 0.023193359375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 233725764, + "step": 1850 + }, + { + "epoch": 0.47482844866286156, + "grad_norm": 55.81576156616211, + "learning_rate": 5e-06, + "loss": 1.0897, + "num_input_tokens_seen": 233851700, + "step": 1851 + }, + { + "epoch": 0.47482844866286156, + "loss": 1.1065967082977295, + "loss_ce": 0.0030810441821813583, + "loss_iou": 0.5, + "loss_num": 0.0208740234375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 233851700, + "step": 1851 + }, + { + "epoch": 0.4750849740268069, + "grad_norm": 53.458316802978516, + "learning_rate": 5e-06, + "loss": 1.0422, + "num_input_tokens_seen": 233976692, + "step": 1852 + }, + { + "epoch": 0.4750849740268069, + "loss": 1.0994961261749268, + "loss_ce": 0.0008633886463940144, + "loss_iou": 0.50390625, + "loss_num": 0.01806640625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 233976692, + "step": 1852 + }, + { + "epoch": 0.47534149939075226, + "grad_norm": 60.9434700012207, + "learning_rate": 5e-06, + "loss": 1.0962, + "num_input_tokens_seen": 234104036, + "step": 1853 + }, + { + "epoch": 0.47534149939075226, + "loss": 0.759556233882904, + "loss_ce": 0.0002788786659948528, + "loss_iou": 0.36328125, + "loss_num": 0.0064697265625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 234104036, + "step": 1853 + }, + { + "epoch": 0.47559802475469765, + "grad_norm": 79.00504302978516, + "learning_rate": 5e-06, + "loss": 1.0278, + "num_input_tokens_seen": 234231608, + "step": 1854 + }, + { + "epoch": 0.47559802475469765, + "loss": 0.9622482657432556, + "loss_ce": 0.0008225020137615502, + "loss_iou": 0.462890625, + "loss_num": 0.0072021484375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 234231608, + "step": 1854 + }, + { + "epoch": 0.47585455011864297, + "grad_norm": 52.6122932434082, + "learning_rate": 5e-06, + "loss": 1.2521, + "num_input_tokens_seen": 234358460, + "step": 1855 + }, + { + "epoch": 0.47585455011864297, + "loss": 1.176763892173767, + "loss_ce": 0.00293576717376709, + "loss_iou": 0.546875, + "loss_num": 0.0166015625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 234358460, + "step": 1855 + }, + { + "epoch": 0.47611107548258835, + "grad_norm": 38.009159088134766, + "learning_rate": 5e-06, + "loss": 0.9668, + "num_input_tokens_seen": 234483944, + "step": 1856 + }, + { + "epoch": 0.47611107548258835, + "loss": 0.9314765930175781, + "loss_ce": 0.0032539386302232742, + "loss_iou": 0.44140625, + "loss_num": 0.00927734375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 234483944, + "step": 1856 + }, + { + "epoch": 0.4763676008465337, + "grad_norm": 22.449817657470703, + "learning_rate": 5e-06, + "loss": 1.0164, + "num_input_tokens_seen": 234609360, + "step": 1857 + }, + { + "epoch": 0.4763676008465337, + "loss": 1.0683443546295166, + "loss_ce": 0.0024264566600322723, + "loss_iou": 0.48046875, + "loss_num": 0.021240234375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 234609360, + "step": 1857 + }, + { + "epoch": 0.47662412621047906, + "grad_norm": 27.350948333740234, + "learning_rate": 5e-06, + "loss": 0.9773, + "num_input_tokens_seen": 234735864, + "step": 1858 + }, + { + "epoch": 0.47662412621047906, + "loss": 0.936783492565155, + "loss_ce": 0.001236609765328467, + "loss_iou": 0.435546875, + "loss_num": 0.01312255859375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 234735864, + "step": 1858 + }, + { + "epoch": 0.47688065157442444, + "grad_norm": 44.44437026977539, + "learning_rate": 5e-06, + "loss": 1.0785, + "num_input_tokens_seen": 234862092, + "step": 1859 + }, + { + "epoch": 0.47688065157442444, + "loss": 1.0489864349365234, + "loss_ce": 0.00015830481424927711, + "loss_iou": 0.5, + "loss_num": 0.009765625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 234862092, + "step": 1859 + }, + { + "epoch": 0.47713717693836977, + "grad_norm": 56.8334846496582, + "learning_rate": 5e-06, + "loss": 0.992, + "num_input_tokens_seen": 234988180, + "step": 1860 + }, + { + "epoch": 0.47713717693836977, + "loss": 0.7944924235343933, + "loss_ce": 0.0015236863400787115, + "loss_iou": 0.37890625, + "loss_num": 0.00653076171875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 234988180, + "step": 1860 + }, + { + "epoch": 0.47739370230231515, + "grad_norm": 53.719661712646484, + "learning_rate": 5e-06, + "loss": 1.0994, + "num_input_tokens_seen": 235114612, + "step": 1861 + }, + { + "epoch": 0.47739370230231515, + "loss": 1.049461007118225, + "loss_ce": 0.003074211999773979, + "loss_iou": 0.4765625, + "loss_num": 0.0184326171875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 235114612, + "step": 1861 + }, + { + "epoch": 0.4776502276662605, + "grad_norm": 53.761531829833984, + "learning_rate": 5e-06, + "loss": 1.1625, + "num_input_tokens_seen": 235241120, + "step": 1862 + }, + { + "epoch": 0.4776502276662605, + "loss": 1.1055303812026978, + "loss_ce": 0.000549982360098511, + "loss_iou": 0.5234375, + "loss_num": 0.01226806640625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 235241120, + "step": 1862 + }, + { + "epoch": 0.47790675303020586, + "grad_norm": 44.110374450683594, + "learning_rate": 5e-06, + "loss": 1.0769, + "num_input_tokens_seen": 235366052, + "step": 1863 + }, + { + "epoch": 0.47790675303020586, + "loss": 1.1106672286987305, + "loss_ce": 0.002268758602440357, + "loss_iou": 0.50390625, + "loss_num": 0.019775390625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 235366052, + "step": 1863 + }, + { + "epoch": 0.47816327839415124, + "grad_norm": 82.33657836914062, + "learning_rate": 5e-06, + "loss": 1.1181, + "num_input_tokens_seen": 235493580, + "step": 1864 + }, + { + "epoch": 0.47816327839415124, + "loss": 1.0870929956436157, + "loss_ce": 0.0026203382294625044, + "loss_iou": 0.498046875, + "loss_num": 0.0179443359375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 235493580, + "step": 1864 + }, + { + "epoch": 0.47841980375809656, + "grad_norm": 55.99766159057617, + "learning_rate": 5e-06, + "loss": 1.2019, + "num_input_tokens_seen": 235620068, + "step": 1865 + }, + { + "epoch": 0.47841980375809656, + "loss": 1.2227039337158203, + "loss_ce": 0.0005359695060178638, + "loss_iou": 0.55859375, + "loss_num": 0.0205078125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 235620068, + "step": 1865 + }, + { + "epoch": 0.47867632912204194, + "grad_norm": 35.03327941894531, + "learning_rate": 5e-06, + "loss": 0.965, + "num_input_tokens_seen": 235746940, + "step": 1866 + }, + { + "epoch": 0.47867632912204194, + "loss": 0.976115882396698, + "loss_ce": 0.0005299156182445586, + "loss_iou": 0.45703125, + "loss_num": 0.0123291015625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 235746940, + "step": 1866 + }, + { + "epoch": 0.4789328544859873, + "grad_norm": 55.970909118652344, + "learning_rate": 5e-06, + "loss": 1.1718, + "num_input_tokens_seen": 235871804, + "step": 1867 + }, + { + "epoch": 0.4789328544859873, + "loss": 1.2108590602874756, + "loss_ce": 0.005292641930282116, + "loss_iou": 0.53125, + "loss_num": 0.028076171875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 235871804, + "step": 1867 + }, + { + "epoch": 0.47918937984993265, + "grad_norm": 53.1323127746582, + "learning_rate": 5e-06, + "loss": 1.1684, + "num_input_tokens_seen": 235998332, + "step": 1868 + }, + { + "epoch": 0.47918937984993265, + "loss": 1.0838267803192139, + "loss_ce": 0.00033065187744796276, + "loss_iou": 0.51171875, + "loss_num": 0.01251220703125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 235998332, + "step": 1868 + }, + { + "epoch": 0.47944590521387803, + "grad_norm": 59.278907775878906, + "learning_rate": 5e-06, + "loss": 1.1133, + "num_input_tokens_seen": 236124304, + "step": 1869 + }, + { + "epoch": 0.47944590521387803, + "loss": 1.0977363586425781, + "loss_ce": 0.0015449493657797575, + "loss_iou": 0.515625, + "loss_num": 0.01214599609375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 236124304, + "step": 1869 + }, + { + "epoch": 0.47970243057782336, + "grad_norm": 60.19270324707031, + "learning_rate": 5e-06, + "loss": 1.0155, + "num_input_tokens_seen": 236250676, + "step": 1870 + }, + { + "epoch": 0.47970243057782336, + "loss": 0.9597852230072021, + "loss_ce": 0.00031253372435458004, + "loss_iou": 0.453125, + "loss_num": 0.0111083984375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 236250676, + "step": 1870 + }, + { + "epoch": 0.47995895594176874, + "grad_norm": 64.63227081298828, + "learning_rate": 5e-06, + "loss": 0.9604, + "num_input_tokens_seen": 236377044, + "step": 1871 + }, + { + "epoch": 0.47995895594176874, + "loss": 0.976029634475708, + "loss_ce": 0.001908586942590773, + "loss_iou": 0.4609375, + "loss_num": 0.01092529296875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 236377044, + "step": 1871 + }, + { + "epoch": 0.4802154813057141, + "grad_norm": 51.903472900390625, + "learning_rate": 5e-06, + "loss": 1.2089, + "num_input_tokens_seen": 236503152, + "step": 1872 + }, + { + "epoch": 0.4802154813057141, + "loss": 1.1109833717346191, + "loss_ce": 0.001120112370699644, + "loss_iou": 0.51171875, + "loss_num": 0.017578125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 236503152, + "step": 1872 + }, + { + "epoch": 0.48047200666965945, + "grad_norm": 25.02709197998047, + "learning_rate": 5e-06, + "loss": 1.1047, + "num_input_tokens_seen": 236629256, + "step": 1873 + }, + { + "epoch": 0.48047200666965945, + "loss": 1.372812271118164, + "loss_ce": 0.002206725999712944, + "loss_iou": 0.59375, + "loss_num": 0.0361328125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 236629256, + "step": 1873 + }, + { + "epoch": 0.4807285320336048, + "grad_norm": 48.01439666748047, + "learning_rate": 5e-06, + "loss": 1.068, + "num_input_tokens_seen": 236756556, + "step": 1874 + }, + { + "epoch": 0.4807285320336048, + "loss": 0.8949036002159119, + "loss_ce": 0.0015931021189317107, + "loss_iou": 0.40625, + "loss_num": 0.0164794921875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 236756556, + "step": 1874 + }, + { + "epoch": 0.4809850573975502, + "grad_norm": 71.54195404052734, + "learning_rate": 5e-06, + "loss": 1.2173, + "num_input_tokens_seen": 236882456, + "step": 1875 + }, + { + "epoch": 0.4809850573975502, + "loss": 1.2417643070220947, + "loss_ce": 0.0005534248193725944, + "loss_iou": 0.57421875, + "loss_num": 0.0186767578125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 236882456, + "step": 1875 + }, + { + "epoch": 0.48124158276149553, + "grad_norm": 52.565513610839844, + "learning_rate": 5e-06, + "loss": 1.0693, + "num_input_tokens_seen": 237009136, + "step": 1876 + }, + { + "epoch": 0.48124158276149553, + "loss": 1.1341297626495361, + "loss_ce": 0.0008288930985145271, + "loss_iou": 0.5234375, + "loss_num": 0.0167236328125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 237009136, + "step": 1876 + }, + { + "epoch": 0.4814981081254409, + "grad_norm": 60.84006881713867, + "learning_rate": 5e-06, + "loss": 1.0085, + "num_input_tokens_seen": 237134704, + "step": 1877 + }, + { + "epoch": 0.4814981081254409, + "loss": 1.101927638053894, + "loss_ce": 0.001341680996119976, + "loss_iou": 0.51953125, + "loss_num": 0.01239013671875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 237134704, + "step": 1877 + }, + { + "epoch": 0.48175463348938624, + "grad_norm": 70.73104095458984, + "learning_rate": 5e-06, + "loss": 1.1226, + "num_input_tokens_seen": 237261744, + "step": 1878 + }, + { + "epoch": 0.48175463348938624, + "loss": 1.0556471347808838, + "loss_ce": 0.0009597218595445156, + "loss_iou": 0.48046875, + "loss_num": 0.0186767578125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 237261744, + "step": 1878 + }, + { + "epoch": 0.4820111588533316, + "grad_norm": 139.2970428466797, + "learning_rate": 5e-06, + "loss": 1.118, + "num_input_tokens_seen": 237387336, + "step": 1879 + }, + { + "epoch": 0.4820111588533316, + "loss": 0.9689565896987915, + "loss_ce": 0.0011831226292997599, + "loss_iou": 0.4453125, + "loss_num": 0.0155029296875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 237387336, + "step": 1879 + }, + { + "epoch": 0.482267684217277, + "grad_norm": 58.858097076416016, + "learning_rate": 5e-06, + "loss": 1.0366, + "num_input_tokens_seen": 237514116, + "step": 1880 + }, + { + "epoch": 0.482267684217277, + "loss": 1.0484997034072876, + "loss_ce": 0.0011364114470779896, + "loss_iou": 0.498046875, + "loss_num": 0.01055908203125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 237514116, + "step": 1880 + }, + { + "epoch": 0.48252420958122233, + "grad_norm": 66.9983139038086, + "learning_rate": 5e-06, + "loss": 1.0154, + "num_input_tokens_seen": 237640164, + "step": 1881 + }, + { + "epoch": 0.48252420958122233, + "loss": 1.135200023651123, + "loss_ce": 0.004340624436736107, + "loss_iou": 0.51171875, + "loss_num": 0.0213623046875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 237640164, + "step": 1881 + }, + { + "epoch": 0.4827807349451677, + "grad_norm": 59.058406829833984, + "learning_rate": 5e-06, + "loss": 1.0876, + "num_input_tokens_seen": 237767264, + "step": 1882 + }, + { + "epoch": 0.4827807349451677, + "loss": 1.0047439336776733, + "loss_ce": 0.00034939180477522314, + "loss_iou": 0.462890625, + "loss_num": 0.01611328125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 237767264, + "step": 1882 + }, + { + "epoch": 0.48303726030911304, + "grad_norm": 52.37096405029297, + "learning_rate": 5e-06, + "loss": 1.0031, + "num_input_tokens_seen": 237893136, + "step": 1883 + }, + { + "epoch": 0.48303726030911304, + "loss": 0.8908854722976685, + "loss_ce": 0.0007487627444788814, + "loss_iou": 0.416015625, + "loss_num": 0.01141357421875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 237893136, + "step": 1883 + }, + { + "epoch": 0.4832937856730584, + "grad_norm": 38.4356803894043, + "learning_rate": 5e-06, + "loss": 1.0872, + "num_input_tokens_seen": 238019616, + "step": 1884 + }, + { + "epoch": 0.4832937856730584, + "loss": 0.9420380592346191, + "loss_ce": 0.00014353601727634668, + "loss_iou": 0.4453125, + "loss_num": 0.00982666015625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 238019616, + "step": 1884 + }, + { + "epoch": 0.4835503110370038, + "grad_norm": 29.950969696044922, + "learning_rate": 5e-06, + "loss": 1.0012, + "num_input_tokens_seen": 238146228, + "step": 1885 + }, + { + "epoch": 0.4835503110370038, + "loss": 1.106387972831726, + "loss_ce": 0.0023840484209358692, + "loss_iou": 0.5, + "loss_num": 0.0203857421875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 238146228, + "step": 1885 + }, + { + "epoch": 0.4838068364009491, + "grad_norm": 30.36648941040039, + "learning_rate": 5e-06, + "loss": 1.0, + "num_input_tokens_seen": 238273048, + "step": 1886 + }, + { + "epoch": 0.4838068364009491, + "loss": 1.0306766033172607, + "loss_ce": 0.0008914822246879339, + "loss_iou": 0.4765625, + "loss_num": 0.0155029296875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 238273048, + "step": 1886 + }, + { + "epoch": 0.4840633617648945, + "grad_norm": 32.554683685302734, + "learning_rate": 5e-06, + "loss": 0.96, + "num_input_tokens_seen": 238399432, + "step": 1887 + }, + { + "epoch": 0.4840633617648945, + "loss": 0.997307538986206, + "loss_ce": 0.0002372346498304978, + "loss_iou": 0.451171875, + "loss_num": 0.0189208984375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 238399432, + "step": 1887 + }, + { + "epoch": 0.4843198871288399, + "grad_norm": 46.41300964355469, + "learning_rate": 5e-06, + "loss": 1.038, + "num_input_tokens_seen": 238526732, + "step": 1888 + }, + { + "epoch": 0.4843198871288399, + "loss": 0.9699275493621826, + "loss_ce": 0.000689274980686605, + "loss_iou": 0.458984375, + "loss_num": 0.01043701171875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 238526732, + "step": 1888 + }, + { + "epoch": 0.4845764124927852, + "grad_norm": 47.23472213745117, + "learning_rate": 5e-06, + "loss": 1.1663, + "num_input_tokens_seen": 238652980, + "step": 1889 + }, + { + "epoch": 0.4845764124927852, + "loss": 1.067474603652954, + "loss_ce": 0.0005800873041152954, + "loss_iou": 0.50390625, + "loss_num": 0.01123046875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 238652980, + "step": 1889 + }, + { + "epoch": 0.4848329378567306, + "grad_norm": 46.800758361816406, + "learning_rate": 5e-06, + "loss": 1.0193, + "num_input_tokens_seen": 238780120, + "step": 1890 + }, + { + "epoch": 0.4848329378567306, + "loss": 1.0568170547485352, + "loss_ce": 0.0060357749462127686, + "loss_iou": 0.455078125, + "loss_num": 0.02783203125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 238780120, + "step": 1890 + }, + { + "epoch": 0.4850894632206759, + "grad_norm": 168.80784606933594, + "learning_rate": 5e-06, + "loss": 1.1761, + "num_input_tokens_seen": 238906264, + "step": 1891 + }, + { + "epoch": 0.4850894632206759, + "loss": 1.2922592163085938, + "loss_ce": 0.0007554080802947283, + "loss_iou": 0.57421875, + "loss_num": 0.029052734375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 238906264, + "step": 1891 + }, + { + "epoch": 0.4853459885846213, + "grad_norm": 48.32796859741211, + "learning_rate": 5e-06, + "loss": 1.0965, + "num_input_tokens_seen": 239032492, + "step": 1892 + }, + { + "epoch": 0.4853459885846213, + "loss": 0.9718291759490967, + "loss_ce": 0.001614385168068111, + "loss_iou": 0.4453125, + "loss_num": 0.01556396484375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 239032492, + "step": 1892 + }, + { + "epoch": 0.4856025139485667, + "grad_norm": 61.12211608886719, + "learning_rate": 5e-06, + "loss": 1.2033, + "num_input_tokens_seen": 239160696, + "step": 1893 + }, + { + "epoch": 0.4856025139485667, + "loss": 1.3243489265441895, + "loss_ce": 0.0011068286839872599, + "loss_iou": 0.6015625, + "loss_num": 0.0235595703125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 239160696, + "step": 1893 + }, + { + "epoch": 0.485859039312512, + "grad_norm": 45.77940368652344, + "learning_rate": 5e-06, + "loss": 1.1886, + "num_input_tokens_seen": 239287788, + "step": 1894 + }, + { + "epoch": 0.485859039312512, + "loss": 1.2442501783370972, + "loss_ce": 0.002550952835008502, + "loss_iou": 0.53515625, + "loss_num": 0.0341796875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 239287788, + "step": 1894 + }, + { + "epoch": 0.4861155646764574, + "grad_norm": 51.95181655883789, + "learning_rate": 5e-06, + "loss": 1.0302, + "num_input_tokens_seen": 239413596, + "step": 1895 + }, + { + "epoch": 0.4861155646764574, + "loss": 0.7887698411941528, + "loss_ce": 0.00019564021204132587, + "loss_iou": 0.373046875, + "loss_num": 0.0087890625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 239413596, + "step": 1895 + }, + { + "epoch": 0.48637209004040277, + "grad_norm": 43.61397933959961, + "learning_rate": 5e-06, + "loss": 1.007, + "num_input_tokens_seen": 239540156, + "step": 1896 + }, + { + "epoch": 0.48637209004040277, + "loss": 1.2286834716796875, + "loss_ce": 0.0030975178815424442, + "loss_iou": 0.546875, + "loss_num": 0.02685546875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 239540156, + "step": 1896 + }, + { + "epoch": 0.4866286154043481, + "grad_norm": 52.75824737548828, + "learning_rate": 5e-06, + "loss": 1.1015, + "num_input_tokens_seen": 239666032, + "step": 1897 + }, + { + "epoch": 0.4866286154043481, + "loss": 1.1054308414459229, + "loss_ce": 0.002403511665761471, + "loss_iou": 0.50390625, + "loss_num": 0.019287109375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 239666032, + "step": 1897 + }, + { + "epoch": 0.4868851407682935, + "grad_norm": 57.324928283691406, + "learning_rate": 5e-06, + "loss": 1.062, + "num_input_tokens_seen": 239792308, + "step": 1898 + }, + { + "epoch": 0.4868851407682935, + "loss": 1.2887264490127563, + "loss_ce": 0.0006405143649317324, + "loss_iou": 0.5859375, + "loss_num": 0.0238037109375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 239792308, + "step": 1898 + }, + { + "epoch": 0.4871416661322388, + "grad_norm": 77.41126251220703, + "learning_rate": 5e-06, + "loss": 0.9484, + "num_input_tokens_seen": 239916732, + "step": 1899 + }, + { + "epoch": 0.4871416661322388, + "loss": 0.9497479200363159, + "loss_ce": 0.001017415663227439, + "loss_iou": 0.4453125, + "loss_num": 0.01214599609375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 239916732, + "step": 1899 + }, + { + "epoch": 0.4873981914961842, + "grad_norm": 58.17295837402344, + "learning_rate": 5e-06, + "loss": 1.0024, + "num_input_tokens_seen": 240045076, + "step": 1900 + }, + { + "epoch": 0.4873981914961842, + "loss": 1.1280183792114258, + "loss_ce": 8.872879698174074e-05, + "loss_iou": 0.5234375, + "loss_num": 0.015380859375, + "loss_xval": 1.125, + "num_input_tokens_seen": 240045076, + "step": 1900 + }, + { + "epoch": 0.48765471686012957, + "grad_norm": 72.91365814208984, + "learning_rate": 5e-06, + "loss": 1.1073, + "num_input_tokens_seen": 240172200, + "step": 1901 + }, + { + "epoch": 0.48765471686012957, + "loss": 1.1327826976776123, + "loss_ce": 0.0009467414347454906, + "loss_iou": 0.51953125, + "loss_num": 0.017822265625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 240172200, + "step": 1901 + }, + { + "epoch": 0.4879112422240749, + "grad_norm": 75.48278045654297, + "learning_rate": 5e-06, + "loss": 1.1224, + "num_input_tokens_seen": 240299100, + "step": 1902 + }, + { + "epoch": 0.4879112422240749, + "loss": 1.176438808441162, + "loss_ce": 0.0021223644725978374, + "loss_iou": 0.52734375, + "loss_num": 0.023681640625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 240299100, + "step": 1902 + }, + { + "epoch": 0.4881677675880203, + "grad_norm": 58.2747917175293, + "learning_rate": 5e-06, + "loss": 1.0731, + "num_input_tokens_seen": 240425316, + "step": 1903 + }, + { + "epoch": 0.4881677675880203, + "loss": 1.0705647468566895, + "loss_ce": 0.0017169974744319916, + "loss_iou": 0.490234375, + "loss_num": 0.017822265625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 240425316, + "step": 1903 + }, + { + "epoch": 0.4884242929519656, + "grad_norm": 60.074546813964844, + "learning_rate": 5e-06, + "loss": 1.1667, + "num_input_tokens_seen": 240551224, + "step": 1904 + }, + { + "epoch": 0.4884242929519656, + "loss": 1.3216359615325928, + "loss_ce": 0.0037648866418749094, + "loss_iou": 0.609375, + "loss_num": 0.0191650390625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 240551224, + "step": 1904 + }, + { + "epoch": 0.488680818315911, + "grad_norm": 52.134796142578125, + "learning_rate": 5e-06, + "loss": 1.108, + "num_input_tokens_seen": 240677508, + "step": 1905 + }, + { + "epoch": 0.488680818315911, + "loss": 1.3853974342346191, + "loss_ce": 0.003073247615247965, + "loss_iou": 0.609375, + "loss_num": 0.0322265625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 240677508, + "step": 1905 + }, + { + "epoch": 0.48893734367985636, + "grad_norm": 50.12730026245117, + "learning_rate": 5e-06, + "loss": 1.1389, + "num_input_tokens_seen": 240803884, + "step": 1906 + }, + { + "epoch": 0.48893734367985636, + "loss": 1.1074507236480713, + "loss_ce": 0.001005438156425953, + "loss_iou": 0.5078125, + "loss_num": 0.0177001953125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 240803884, + "step": 1906 + }, + { + "epoch": 0.4891938690438017, + "grad_norm": 51.152069091796875, + "learning_rate": 5e-06, + "loss": 0.9208, + "num_input_tokens_seen": 240929660, + "step": 1907 + }, + { + "epoch": 0.4891938690438017, + "loss": 0.9798082113265991, + "loss_ce": 0.0027573721017688513, + "loss_iou": 0.4453125, + "loss_num": 0.017333984375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 240929660, + "step": 1907 + }, + { + "epoch": 0.48945039440774707, + "grad_norm": 66.84367370605469, + "learning_rate": 5e-06, + "loss": 0.9741, + "num_input_tokens_seen": 241057232, + "step": 1908 + }, + { + "epoch": 0.48945039440774707, + "loss": 0.9712358117103577, + "loss_ce": 0.0005326881073415279, + "loss_iou": 0.462890625, + "loss_num": 0.0086669921875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 241057232, + "step": 1908 + }, + { + "epoch": 0.48970691977169245, + "grad_norm": 68.10382843017578, + "learning_rate": 5e-06, + "loss": 1.1106, + "num_input_tokens_seen": 241184000, + "step": 1909 + }, + { + "epoch": 0.48970691977169245, + "loss": 1.1017725467681885, + "loss_ce": 0.0006984078790992498, + "loss_iou": 0.5078125, + "loss_num": 0.0174560546875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 241184000, + "step": 1909 + }, + { + "epoch": 0.4899634451356378, + "grad_norm": 63.288082122802734, + "learning_rate": 5e-06, + "loss": 1.0852, + "num_input_tokens_seen": 241310628, + "step": 1910 + }, + { + "epoch": 0.4899634451356378, + "loss": 0.9879711866378784, + "loss_ce": 0.0031079333275556564, + "loss_iou": 0.44921875, + "loss_num": 0.017333984375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 241310628, + "step": 1910 + }, + { + "epoch": 0.49021997049958316, + "grad_norm": 57.59235382080078, + "learning_rate": 5e-06, + "loss": 1.0478, + "num_input_tokens_seen": 241437444, + "step": 1911 + }, + { + "epoch": 0.49021997049958316, + "loss": 1.233176827430725, + "loss_ce": 0.003684642491862178, + "loss_iou": 0.56640625, + "loss_num": 0.018798828125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 241437444, + "step": 1911 + }, + { + "epoch": 0.4904764958635285, + "grad_norm": 45.789493560791016, + "learning_rate": 5e-06, + "loss": 0.967, + "num_input_tokens_seen": 241563168, + "step": 1912 + }, + { + "epoch": 0.4904764958635285, + "loss": 0.9186408519744873, + "loss_ce": 0.0011604165192693472, + "loss_iou": 0.43359375, + "loss_num": 0.01025390625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 241563168, + "step": 1912 + }, + { + "epoch": 0.49073302122747386, + "grad_norm": 19.271467208862305, + "learning_rate": 5e-06, + "loss": 0.986, + "num_input_tokens_seen": 241689256, + "step": 1913 + }, + { + "epoch": 0.49073302122747386, + "loss": 0.9275004863739014, + "loss_ce": 0.000254409562330693, + "loss_iou": 0.4453125, + "loss_num": 0.00726318359375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 241689256, + "step": 1913 + }, + { + "epoch": 0.49098954659141925, + "grad_norm": 38.08987045288086, + "learning_rate": 5e-06, + "loss": 1.0499, + "num_input_tokens_seen": 241815580, + "step": 1914 + }, + { + "epoch": 0.49098954659141925, + "loss": 1.1820391416549683, + "loss_ce": 0.00039859546814113855, + "loss_iou": 0.53125, + "loss_num": 0.02392578125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 241815580, + "step": 1914 + }, + { + "epoch": 0.49124607195536457, + "grad_norm": 38.861270904541016, + "learning_rate": 5e-06, + "loss": 0.9547, + "num_input_tokens_seen": 241942024, + "step": 1915 + }, + { + "epoch": 0.49124607195536457, + "loss": 0.9469727873802185, + "loss_ce": 0.0006837384426034987, + "loss_iou": 0.44140625, + "loss_num": 0.0126953125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 241942024, + "step": 1915 + }, + { + "epoch": 0.49150259731930995, + "grad_norm": 51.583961486816406, + "learning_rate": 5e-06, + "loss": 1.0277, + "num_input_tokens_seen": 242068244, + "step": 1916 + }, + { + "epoch": 0.49150259731930995, + "loss": 0.9519945979118347, + "loss_ce": 0.001310988562181592, + "loss_iou": 0.43359375, + "loss_num": 0.0169677734375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 242068244, + "step": 1916 + }, + { + "epoch": 0.49175912268325533, + "grad_norm": 49.44455337524414, + "learning_rate": 5e-06, + "loss": 1.0902, + "num_input_tokens_seen": 242195124, + "step": 1917 + }, + { + "epoch": 0.49175912268325533, + "loss": 1.2363417148590088, + "loss_ce": 0.0014783935621380806, + "loss_iou": 0.5546875, + "loss_num": 0.025390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 242195124, + "step": 1917 + }, + { + "epoch": 0.49201564804720066, + "grad_norm": 61.73346710205078, + "learning_rate": 5e-06, + "loss": 1.0024, + "num_input_tokens_seen": 242322144, + "step": 1918 + }, + { + "epoch": 0.49201564804720066, + "loss": 1.0276238918304443, + "loss_ce": 0.002721537835896015, + "loss_iou": 0.48046875, + "loss_num": 0.01239013671875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 242322144, + "step": 1918 + }, + { + "epoch": 0.49227217341114604, + "grad_norm": 54.37204360961914, + "learning_rate": 5e-06, + "loss": 1.0893, + "num_input_tokens_seen": 242449096, + "step": 1919 + }, + { + "epoch": 0.49227217341114604, + "loss": 1.2892987728118896, + "loss_ce": 0.0021894387900829315, + "loss_iou": 0.59375, + "loss_num": 0.019775390625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 242449096, + "step": 1919 + }, + { + "epoch": 0.49252869877509137, + "grad_norm": 39.87238311767578, + "learning_rate": 5e-06, + "loss": 1.0461, + "num_input_tokens_seen": 242576124, + "step": 1920 + }, + { + "epoch": 0.49252869877509137, + "loss": 1.0162527561187744, + "loss_ce": 0.0016043331706896424, + "loss_iou": 0.474609375, + "loss_num": 0.0130615234375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 242576124, + "step": 1920 + }, + { + "epoch": 0.49278522413903675, + "grad_norm": 55.316551208496094, + "learning_rate": 5e-06, + "loss": 0.9979, + "num_input_tokens_seen": 242702372, + "step": 1921 + }, + { + "epoch": 0.49278522413903675, + "loss": 1.0177385807037354, + "loss_ce": 0.0011371177388355136, + "loss_iou": 0.4765625, + "loss_num": 0.0130615234375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 242702372, + "step": 1921 + }, + { + "epoch": 0.49304174950298213, + "grad_norm": 79.78050231933594, + "learning_rate": 5e-06, + "loss": 1.1642, + "num_input_tokens_seen": 242827564, + "step": 1922 + }, + { + "epoch": 0.49304174950298213, + "loss": 1.2796180248260498, + "loss_ce": 0.0008093510987237096, + "loss_iou": 0.5625, + "loss_num": 0.0302734375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 242827564, + "step": 1922 + }, + { + "epoch": 0.49329827486692746, + "grad_norm": 54.527462005615234, + "learning_rate": 5e-06, + "loss": 1.0974, + "num_input_tokens_seen": 242953688, + "step": 1923 + }, + { + "epoch": 0.49329827486692746, + "loss": 1.0572283267974854, + "loss_ce": 9.932818647939712e-05, + "loss_iou": 0.50390625, + "loss_num": 0.01055908203125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 242953688, + "step": 1923 + }, + { + "epoch": 0.49355480023087284, + "grad_norm": 54.472049713134766, + "learning_rate": 5e-06, + "loss": 0.9799, + "num_input_tokens_seen": 243079736, + "step": 1924 + }, + { + "epoch": 0.49355480023087284, + "loss": 1.0056657791137695, + "loss_ce": 0.0002946704626083374, + "loss_iou": 0.46484375, + "loss_num": 0.01458740234375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 243079736, + "step": 1924 + }, + { + "epoch": 0.49381132559481816, + "grad_norm": 58.619808197021484, + "learning_rate": 5e-06, + "loss": 1.1223, + "num_input_tokens_seen": 243206792, + "step": 1925 + }, + { + "epoch": 0.49381132559481816, + "loss": 1.0290852785110474, + "loss_ce": 0.0002766991383396089, + "loss_iou": 0.4765625, + "loss_num": 0.0146484375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 243206792, + "step": 1925 + }, + { + "epoch": 0.49406785095876354, + "grad_norm": 60.52656555175781, + "learning_rate": 5e-06, + "loss": 1.116, + "num_input_tokens_seen": 243332240, + "step": 1926 + }, + { + "epoch": 0.49406785095876354, + "loss": 1.2176557779312134, + "loss_ce": 0.0003705949930008501, + "loss_iou": 0.546875, + "loss_num": 0.024658203125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 243332240, + "step": 1926 + }, + { + "epoch": 0.4943243763227089, + "grad_norm": 54.53802490234375, + "learning_rate": 5e-06, + "loss": 0.9904, + "num_input_tokens_seen": 243458792, + "step": 1927 + }, + { + "epoch": 0.4943243763227089, + "loss": 1.0238008499145508, + "loss_ce": 0.0013399901799857616, + "loss_iou": 0.48828125, + "loss_num": 0.00933837890625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 243458792, + "step": 1927 + }, + { + "epoch": 0.49458090168665425, + "grad_norm": 64.47735595703125, + "learning_rate": 5e-06, + "loss": 1.1346, + "num_input_tokens_seen": 243584560, + "step": 1928 + }, + { + "epoch": 0.49458090168665425, + "loss": 1.1117148399353027, + "loss_ce": 0.0013632903574034572, + "loss_iou": 0.51171875, + "loss_num": 0.017822265625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 243584560, + "step": 1928 + }, + { + "epoch": 0.49483742705059963, + "grad_norm": 60.638912200927734, + "learning_rate": 5e-06, + "loss": 1.1502, + "num_input_tokens_seen": 243712372, + "step": 1929 + }, + { + "epoch": 0.49483742705059963, + "loss": 1.1220852136611938, + "loss_ce": 0.000991495093330741, + "loss_iou": 0.51171875, + "loss_num": 0.02001953125, + "loss_xval": 1.125, + "num_input_tokens_seen": 243712372, + "step": 1929 + }, + { + "epoch": 0.495093952414545, + "grad_norm": 61.18042755126953, + "learning_rate": 5e-06, + "loss": 1.0612, + "num_input_tokens_seen": 243837876, + "step": 1930 + }, + { + "epoch": 0.495093952414545, + "loss": 1.0717285871505737, + "loss_ce": 0.0019043156644329429, + "loss_iou": 0.4921875, + "loss_num": 0.0167236328125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 243837876, + "step": 1930 + }, + { + "epoch": 0.49535047777849034, + "grad_norm": 70.23001098632812, + "learning_rate": 5e-06, + "loss": 1.2237, + "num_input_tokens_seen": 243964604, + "step": 1931 + }, + { + "epoch": 0.49535047777849034, + "loss": 1.2345119714736938, + "loss_ce": 0.001601781346835196, + "loss_iou": 0.58203125, + "loss_num": 0.013671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 243964604, + "step": 1931 + }, + { + "epoch": 0.4956070031424357, + "grad_norm": 60.20863723754883, + "learning_rate": 5e-06, + "loss": 1.0879, + "num_input_tokens_seen": 244092140, + "step": 1932 + }, + { + "epoch": 0.4956070031424357, + "loss": 0.8927797079086304, + "loss_ce": 0.00020157046674285084, + "loss_iou": 0.419921875, + "loss_num": 0.0106201171875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 244092140, + "step": 1932 + }, + { + "epoch": 0.49586352850638105, + "grad_norm": 52.12117385864258, + "learning_rate": 5e-06, + "loss": 1.081, + "num_input_tokens_seen": 244217368, + "step": 1933 + }, + { + "epoch": 0.49586352850638105, + "loss": 1.0752372741699219, + "loss_ce": 0.0019950554706156254, + "loss_iou": 0.4921875, + "loss_num": 0.017822265625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 244217368, + "step": 1933 + }, + { + "epoch": 0.4961200538703264, + "grad_norm": 46.27729415893555, + "learning_rate": 5e-06, + "loss": 1.1122, + "num_input_tokens_seen": 244342860, + "step": 1934 + }, + { + "epoch": 0.4961200538703264, + "loss": 1.1768549680709839, + "loss_ce": 0.004491744097322226, + "loss_iou": 0.5234375, + "loss_num": 0.0257568359375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 244342860, + "step": 1934 + }, + { + "epoch": 0.4963765792342718, + "grad_norm": 59.024173736572266, + "learning_rate": 5e-06, + "loss": 1.0329, + "num_input_tokens_seen": 244469344, + "step": 1935 + }, + { + "epoch": 0.4963765792342718, + "loss": 0.9137611389160156, + "loss_ce": 0.0016517710173502564, + "loss_iou": 0.423828125, + "loss_num": 0.01275634765625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 244469344, + "step": 1935 + }, + { + "epoch": 0.49663310459821713, + "grad_norm": 91.14009094238281, + "learning_rate": 5e-06, + "loss": 1.1798, + "num_input_tokens_seen": 244596636, + "step": 1936 + }, + { + "epoch": 0.49663310459821713, + "loss": 1.2700226306915283, + "loss_ce": 0.0004914518794976175, + "loss_iou": 0.5703125, + "loss_num": 0.0263671875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 244596636, + "step": 1936 + }, + { + "epoch": 0.4968896299621625, + "grad_norm": 56.09843826293945, + "learning_rate": 5e-06, + "loss": 1.1193, + "num_input_tokens_seen": 244724028, + "step": 1937 + }, + { + "epoch": 0.4968896299621625, + "loss": 1.201474905014038, + "loss_ce": 0.0012796757509931922, + "loss_iou": 0.55859375, + "loss_num": 0.01611328125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 244724028, + "step": 1937 + }, + { + "epoch": 0.49714615532610784, + "grad_norm": 29.887001037597656, + "learning_rate": 5e-06, + "loss": 1.0349, + "num_input_tokens_seen": 244850608, + "step": 1938 + }, + { + "epoch": 0.49714615532610784, + "loss": 1.0147888660430908, + "loss_ce": 0.005511517636477947, + "loss_iou": 0.466796875, + "loss_num": 0.01519775390625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 244850608, + "step": 1938 + }, + { + "epoch": 0.4974026806900532, + "grad_norm": 37.54344940185547, + "learning_rate": 5e-06, + "loss": 1.2049, + "num_input_tokens_seen": 244976636, + "step": 1939 + }, + { + "epoch": 0.4974026806900532, + "loss": 1.0757300853729248, + "loss_ce": 0.0019996582996100187, + "loss_iou": 0.478515625, + "loss_num": 0.0234375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 244976636, + "step": 1939 + }, + { + "epoch": 0.4976592060539986, + "grad_norm": 50.91771697998047, + "learning_rate": 5e-06, + "loss": 1.1476, + "num_input_tokens_seen": 245103236, + "step": 1940 + }, + { + "epoch": 0.4976592060539986, + "loss": 0.9971590638160706, + "loss_ce": 0.0005770362331531942, + "loss_iou": 0.46484375, + "loss_num": 0.013427734375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 245103236, + "step": 1940 + }, + { + "epoch": 0.49791573141794393, + "grad_norm": 41.448246002197266, + "learning_rate": 5e-06, + "loss": 0.9929, + "num_input_tokens_seen": 245230184, + "step": 1941 + }, + { + "epoch": 0.49791573141794393, + "loss": 0.9098992347717285, + "loss_ce": 0.0014519400428980589, + "loss_iou": 0.42578125, + "loss_num": 0.01141357421875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 245230184, + "step": 1941 + }, + { + "epoch": 0.4981722567818893, + "grad_norm": 53.021358489990234, + "learning_rate": 5e-06, + "loss": 0.9471, + "num_input_tokens_seen": 245355484, + "step": 1942 + }, + { + "epoch": 0.4981722567818893, + "loss": 0.9286558032035828, + "loss_ce": 0.0004331194795668125, + "loss_iou": 0.44140625, + "loss_num": 0.009033203125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 245355484, + "step": 1942 + }, + { + "epoch": 0.4984287821458347, + "grad_norm": 63.41948318481445, + "learning_rate": 5e-06, + "loss": 0.9912, + "num_input_tokens_seen": 245482692, + "step": 1943 + }, + { + "epoch": 0.4984287821458347, + "loss": 1.0374295711517334, + "loss_ce": 0.00032016431214287877, + "loss_iou": 0.5, + "loss_num": 0.007476806640625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 245482692, + "step": 1943 + }, + { + "epoch": 0.49868530750978, + "grad_norm": 56.867366790771484, + "learning_rate": 5e-06, + "loss": 0.9644, + "num_input_tokens_seen": 245609504, + "step": 1944 + }, + { + "epoch": 0.49868530750978, + "loss": 1.0545978546142578, + "loss_ce": 0.00039873310015536845, + "loss_iou": 0.48046875, + "loss_num": 0.0181884765625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 245609504, + "step": 1944 + }, + { + "epoch": 0.4989418328737254, + "grad_norm": 51.33348846435547, + "learning_rate": 5e-06, + "loss": 1.1593, + "num_input_tokens_seen": 245735616, + "step": 1945 + }, + { + "epoch": 0.4989418328737254, + "loss": 1.1304134130477905, + "loss_ce": 0.003948523662984371, + "loss_iou": 0.515625, + "loss_num": 0.01953125, + "loss_xval": 1.125, + "num_input_tokens_seen": 245735616, + "step": 1945 + }, + { + "epoch": 0.4991983582376707, + "grad_norm": 69.12086486816406, + "learning_rate": 5e-06, + "loss": 1.0694, + "num_input_tokens_seen": 245861240, + "step": 1946 + }, + { + "epoch": 0.4991983582376707, + "loss": 0.9726510047912598, + "loss_ce": 0.00048304349184036255, + "loss_iou": 0.4609375, + "loss_num": 0.01007080078125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 245861240, + "step": 1946 + }, + { + "epoch": 0.4994548836016161, + "grad_norm": 50.16570281982422, + "learning_rate": 5e-06, + "loss": 1.111, + "num_input_tokens_seen": 245987520, + "step": 1947 + }, + { + "epoch": 0.4994548836016161, + "loss": 1.1666797399520874, + "loss_ce": 0.0011523929424583912, + "loss_iou": 0.5390625, + "loss_num": 0.0179443359375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 245987520, + "step": 1947 + }, + { + "epoch": 0.4997114089655615, + "grad_norm": 28.66356658935547, + "learning_rate": 5e-06, + "loss": 0.9775, + "num_input_tokens_seen": 246113292, + "step": 1948 + }, + { + "epoch": 0.4997114089655615, + "loss": 0.9311819672584534, + "loss_ce": 0.0014944535214453936, + "loss_iou": 0.41796875, + "loss_num": 0.0185546875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 246113292, + "step": 1948 + }, + { + "epoch": 0.4999679343295068, + "grad_norm": 48.403282165527344, + "learning_rate": 5e-06, + "loss": 0.982, + "num_input_tokens_seen": 246239848, + "step": 1949 + }, + { + "epoch": 0.4999679343295068, + "loss": 1.0772569179534912, + "loss_ce": 0.0032824124209582806, + "loss_iou": 0.484375, + "loss_num": 0.0213623046875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 246239848, + "step": 1949 + }, + { + "epoch": 0.5002244596934522, + "grad_norm": 49.93346405029297, + "learning_rate": 5e-06, + "loss": 1.0219, + "num_input_tokens_seen": 246366468, + "step": 1950 + }, + { + "epoch": 0.5002244596934522, + "loss": 1.0052132606506348, + "loss_ce": 0.0003304735291749239, + "loss_iou": 0.46875, + "loss_num": 0.0133056640625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 246366468, + "step": 1950 + }, + { + "epoch": 0.5004809850573976, + "grad_norm": 42.63022994995117, + "learning_rate": 5e-06, + "loss": 1.0861, + "num_input_tokens_seen": 246492332, + "step": 1951 + }, + { + "epoch": 0.5004809850573976, + "loss": 1.0916639566421509, + "loss_ce": 0.003773325588554144, + "loss_iou": 0.5, + "loss_num": 0.017333984375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 246492332, + "step": 1951 + }, + { + "epoch": 0.500737510421343, + "grad_norm": 61.87216567993164, + "learning_rate": 5e-06, + "loss": 1.0761, + "num_input_tokens_seen": 246619012, + "step": 1952 + }, + { + "epoch": 0.500737510421343, + "loss": 1.1181005239486694, + "loss_ce": 0.001401299610733986, + "loss_iou": 0.515625, + "loss_num": 0.0167236328125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 246619012, + "step": 1952 + }, + { + "epoch": 0.5009940357852882, + "grad_norm": 52.38215637207031, + "learning_rate": 5e-06, + "loss": 1.0396, + "num_input_tokens_seen": 246745432, + "step": 1953 + }, + { + "epoch": 0.5009940357852882, + "loss": 0.975311279296875, + "loss_ce": 0.0011901702964678407, + "loss_iou": 0.453125, + "loss_num": 0.01348876953125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 246745432, + "step": 1953 + }, + { + "epoch": 0.5012505611492336, + "grad_norm": 40.68037033081055, + "learning_rate": 5e-06, + "loss": 1.0924, + "num_input_tokens_seen": 246869732, + "step": 1954 + }, + { + "epoch": 0.5012505611492336, + "loss": 1.013520359992981, + "loss_ce": 0.00033668422838672996, + "loss_iou": 0.466796875, + "loss_num": 0.0157470703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 246869732, + "step": 1954 + }, + { + "epoch": 0.501507086513179, + "grad_norm": 48.43160629272461, + "learning_rate": 5e-06, + "loss": 1.0979, + "num_input_tokens_seen": 246995280, + "step": 1955 + }, + { + "epoch": 0.501507086513179, + "loss": 1.053154706954956, + "loss_ce": 0.0009085642523132265, + "loss_iou": 0.490234375, + "loss_num": 0.01434326171875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 246995280, + "step": 1955 + }, + { + "epoch": 0.5017636118771244, + "grad_norm": 39.772708892822266, + "learning_rate": 5e-06, + "loss": 1.1107, + "num_input_tokens_seen": 247121336, + "step": 1956 + }, + { + "epoch": 0.5017636118771244, + "loss": 1.1274282932281494, + "loss_ce": 0.0009635047172196209, + "loss_iou": 0.5, + "loss_num": 0.0247802734375, + "loss_xval": 1.125, + "num_input_tokens_seen": 247121336, + "step": 1956 + }, + { + "epoch": 0.5020201372410698, + "grad_norm": 52.89803695678711, + "learning_rate": 5e-06, + "loss": 1.1745, + "num_input_tokens_seen": 247247912, + "step": 1957 + }, + { + "epoch": 0.5020201372410698, + "loss": 1.1431041955947876, + "loss_ce": 0.0005260383477434516, + "loss_iou": 0.51953125, + "loss_num": 0.02001953125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 247247912, + "step": 1957 + }, + { + "epoch": 0.502276662605015, + "grad_norm": 88.6466293334961, + "learning_rate": 5e-06, + "loss": 1.1005, + "num_input_tokens_seen": 247374980, + "step": 1958 + }, + { + "epoch": 0.502276662605015, + "loss": 1.0535577535629272, + "loss_ce": 0.0017999352421611547, + "loss_iou": 0.47265625, + "loss_num": 0.02099609375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 247374980, + "step": 1958 + }, + { + "epoch": 0.5025331879689604, + "grad_norm": 46.7523078918457, + "learning_rate": 5e-06, + "loss": 1.2314, + "num_input_tokens_seen": 247500752, + "step": 1959 + }, + { + "epoch": 0.5025331879689604, + "loss": 1.103193759918213, + "loss_ce": 0.0030960242729634047, + "loss_iou": 0.5, + "loss_num": 0.0198974609375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 247500752, + "step": 1959 + }, + { + "epoch": 0.5027897133329058, + "grad_norm": 31.266008377075195, + "learning_rate": 5e-06, + "loss": 1.0209, + "num_input_tokens_seen": 247628084, + "step": 1960 + }, + { + "epoch": 0.5027897133329058, + "loss": 0.9163991808891296, + "loss_ce": 0.0006276534404605627, + "loss_iou": 0.431640625, + "loss_num": 0.0108642578125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 247628084, + "step": 1960 + }, + { + "epoch": 0.5030462386968512, + "grad_norm": 43.27685546875, + "learning_rate": 5e-06, + "loss": 1.025, + "num_input_tokens_seen": 247754168, + "step": 1961 + }, + { + "epoch": 0.5030462386968512, + "loss": 0.9553647041320801, + "loss_ce": 0.0012631658464670181, + "loss_iou": 0.421875, + "loss_num": 0.0218505859375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 247754168, + "step": 1961 + }, + { + "epoch": 0.5033027640607965, + "grad_norm": 53.06964874267578, + "learning_rate": 5e-06, + "loss": 0.9973, + "num_input_tokens_seen": 247880588, + "step": 1962 + }, + { + "epoch": 0.5033027640607965, + "loss": 1.1187806129455566, + "loss_ce": 0.0015931295929476619, + "loss_iou": 0.51953125, + "loss_num": 0.01507568359375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 247880588, + "step": 1962 + }, + { + "epoch": 0.5035592894247418, + "grad_norm": 80.0895767211914, + "learning_rate": 5e-06, + "loss": 1.0593, + "num_input_tokens_seen": 248006464, + "step": 1963 + }, + { + "epoch": 0.5035592894247418, + "loss": 1.1478772163391113, + "loss_ce": 0.00041630020132288337, + "loss_iou": 0.5390625, + "loss_num": 0.014404296875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 248006464, + "step": 1963 + }, + { + "epoch": 0.5038158147886872, + "grad_norm": 46.15283203125, + "learning_rate": 5e-06, + "loss": 1.156, + "num_input_tokens_seen": 248132392, + "step": 1964 + }, + { + "epoch": 0.5038158147886872, + "loss": 1.1452829837799072, + "loss_ce": 0.0002634258707985282, + "loss_iou": 0.53515625, + "loss_num": 0.01434326171875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 248132392, + "step": 1964 + }, + { + "epoch": 0.5040723401526326, + "grad_norm": 26.691389083862305, + "learning_rate": 5e-06, + "loss": 0.9927, + "num_input_tokens_seen": 248259488, + "step": 1965 + }, + { + "epoch": 0.5040723401526326, + "loss": 0.9330199360847473, + "loss_ce": 0.0006468782667070627, + "loss_iou": 0.43359375, + "loss_num": 0.0128173828125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 248259488, + "step": 1965 + }, + { + "epoch": 0.504328865516578, + "grad_norm": 30.740751266479492, + "learning_rate": 5e-06, + "loss": 0.8862, + "num_input_tokens_seen": 248386300, + "step": 1966 + }, + { + "epoch": 0.504328865516578, + "loss": 0.757299542427063, + "loss_ce": 0.0004636160738300532, + "loss_iou": 0.36328125, + "loss_num": 0.006103515625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 248386300, + "step": 1966 + }, + { + "epoch": 0.5045853908805233, + "grad_norm": 31.324859619140625, + "learning_rate": 5e-06, + "loss": 0.985, + "num_input_tokens_seen": 248511552, + "step": 1967 + }, + { + "epoch": 0.5045853908805233, + "loss": 1.0081883668899536, + "loss_ce": 0.0033055779058486223, + "loss_iou": 0.4296875, + "loss_num": 0.0289306640625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 248511552, + "step": 1967 + }, + { + "epoch": 0.5048419162444687, + "grad_norm": 34.80128479003906, + "learning_rate": 5e-06, + "loss": 1.0956, + "num_input_tokens_seen": 248636796, + "step": 1968 + }, + { + "epoch": 0.5048419162444687, + "loss": 1.0826632976531982, + "loss_ce": 0.001608680235221982, + "loss_iou": 0.50390625, + "loss_num": 0.0142822265625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 248636796, + "step": 1968 + }, + { + "epoch": 0.505098441608414, + "grad_norm": 60.90861511230469, + "learning_rate": 5e-06, + "loss": 1.0269, + "num_input_tokens_seen": 248763152, + "step": 1969 + }, + { + "epoch": 0.505098441608414, + "loss": 1.2116063833236694, + "loss_ce": 0.0006688160356134176, + "loss_iou": 0.5625, + "loss_num": 0.0181884765625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 248763152, + "step": 1969 + }, + { + "epoch": 0.5053549669723594, + "grad_norm": 54.52490997314453, + "learning_rate": 5e-06, + "loss": 1.0443, + "num_input_tokens_seen": 248889360, + "step": 1970 + }, + { + "epoch": 0.5053549669723594, + "loss": 1.0789786577224731, + "loss_ce": 0.0008537425310350955, + "loss_iou": 0.5078125, + "loss_num": 0.01177978515625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 248889360, + "step": 1970 + }, + { + "epoch": 0.5056114923363048, + "grad_norm": 44.14472579956055, + "learning_rate": 5e-06, + "loss": 1.119, + "num_input_tokens_seen": 249016180, + "step": 1971 + }, + { + "epoch": 0.5056114923363048, + "loss": 1.0135817527770996, + "loss_ce": 0.005280913319438696, + "loss_iou": 0.4609375, + "loss_num": 0.0174560546875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 249016180, + "step": 1971 + }, + { + "epoch": 0.5058680177002501, + "grad_norm": 43.29777145385742, + "learning_rate": 5e-06, + "loss": 1.0543, + "num_input_tokens_seen": 249141788, + "step": 1972 + }, + { + "epoch": 0.5058680177002501, + "loss": 1.0376993417739868, + "loss_ce": 0.001078238827176392, + "loss_iou": 0.478515625, + "loss_num": 0.01611328125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 249141788, + "step": 1972 + }, + { + "epoch": 0.5061245430641955, + "grad_norm": 60.015464782714844, + "learning_rate": 5e-06, + "loss": 1.0604, + "num_input_tokens_seen": 249267964, + "step": 1973 + }, + { + "epoch": 0.5061245430641955, + "loss": 0.9710097312927246, + "loss_ce": 0.00030658955802209675, + "loss_iou": 0.443359375, + "loss_num": 0.016357421875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 249267964, + "step": 1973 + }, + { + "epoch": 0.5063810684281408, + "grad_norm": 51.441287994384766, + "learning_rate": 5e-06, + "loss": 1.3333, + "num_input_tokens_seen": 249393732, + "step": 1974 + }, + { + "epoch": 0.5063810684281408, + "loss": 1.1207516193389893, + "loss_ce": 0.0006344185094349086, + "loss_iou": 0.52734375, + "loss_num": 0.01361083984375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 249393732, + "step": 1974 + }, + { + "epoch": 0.5066375937920862, + "grad_norm": 35.239501953125, + "learning_rate": 5e-06, + "loss": 1.0083, + "num_input_tokens_seen": 249519996, + "step": 1975 + }, + { + "epoch": 0.5066375937920862, + "loss": 1.1433062553405762, + "loss_ce": 0.0012164422078058124, + "loss_iou": 0.52734375, + "loss_num": 0.0177001953125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 249519996, + "step": 1975 + }, + { + "epoch": 0.5068941191560316, + "grad_norm": 49.210357666015625, + "learning_rate": 5e-06, + "loss": 1.045, + "num_input_tokens_seen": 249645888, + "step": 1976 + }, + { + "epoch": 0.5068941191560316, + "loss": 1.0820924043655396, + "loss_ce": 0.0010376889258623123, + "loss_iou": 0.484375, + "loss_num": 0.0225830078125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 249645888, + "step": 1976 + }, + { + "epoch": 0.5071506445199769, + "grad_norm": 47.27915573120117, + "learning_rate": 5e-06, + "loss": 0.8378, + "num_input_tokens_seen": 249772304, + "step": 1977 + }, + { + "epoch": 0.5071506445199769, + "loss": 0.8184223771095276, + "loss_ce": 0.0005512924981303513, + "loss_iou": 0.392578125, + "loss_num": 0.006988525390625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 249772304, + "step": 1977 + }, + { + "epoch": 0.5074071698839223, + "grad_norm": 58.044532775878906, + "learning_rate": 5e-06, + "loss": 1.0967, + "num_input_tokens_seen": 249898388, + "step": 1978 + }, + { + "epoch": 0.5074071698839223, + "loss": 1.058220624923706, + "loss_ce": 0.0010916382307186723, + "loss_iou": 0.474609375, + "loss_num": 0.021728515625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 249898388, + "step": 1978 + }, + { + "epoch": 0.5076636952478676, + "grad_norm": 77.34203338623047, + "learning_rate": 5e-06, + "loss": 1.0342, + "num_input_tokens_seen": 250024756, + "step": 1979 + }, + { + "epoch": 0.5076636952478676, + "loss": 1.0256783962249756, + "loss_ce": 0.005170594435185194, + "loss_iou": 0.4765625, + "loss_num": 0.013427734375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 250024756, + "step": 1979 + }, + { + "epoch": 0.507920220611813, + "grad_norm": 70.75835418701172, + "learning_rate": 5e-06, + "loss": 1.0178, + "num_input_tokens_seen": 250149928, + "step": 1980 + }, + { + "epoch": 0.507920220611813, + "loss": 1.0374345779418945, + "loss_ce": 0.0017900425009429455, + "loss_iou": 0.478515625, + "loss_num": 0.01544189453125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 250149928, + "step": 1980 + }, + { + "epoch": 0.5081767459757583, + "grad_norm": 37.257301330566406, + "learning_rate": 5e-06, + "loss": 0.9649, + "num_input_tokens_seen": 250276676, + "step": 1981 + }, + { + "epoch": 0.5081767459757583, + "loss": 0.9519698619842529, + "loss_ce": 0.0007979950751177967, + "loss_iou": 0.447265625, + "loss_num": 0.01123046875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 250276676, + "step": 1981 + }, + { + "epoch": 0.5084332713397037, + "grad_norm": 56.55401611328125, + "learning_rate": 5e-06, + "loss": 1.028, + "num_input_tokens_seen": 250403840, + "step": 1982 + }, + { + "epoch": 0.5084332713397037, + "loss": 1.0567615032196045, + "loss_ce": 0.0010973232565447688, + "loss_iou": 0.484375, + "loss_num": 0.0172119140625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 250403840, + "step": 1982 + }, + { + "epoch": 0.5086897967036491, + "grad_norm": 60.8635139465332, + "learning_rate": 5e-06, + "loss": 0.9688, + "num_input_tokens_seen": 250531108, + "step": 1983 + }, + { + "epoch": 0.5086897967036491, + "loss": 0.7423478364944458, + "loss_ce": 0.0006485896301455796, + "loss_iou": 0.35546875, + "loss_num": 0.006561279296875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 250531108, + "step": 1983 + }, + { + "epoch": 0.5089463220675944, + "grad_norm": 75.34029388427734, + "learning_rate": 5e-06, + "loss": 1.1086, + "num_input_tokens_seen": 250657688, + "step": 1984 + }, + { + "epoch": 0.5089463220675944, + "loss": 0.9669362306594849, + "loss_ce": 0.0020924555137753487, + "loss_iou": 0.451171875, + "loss_num": 0.01226806640625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 250657688, + "step": 1984 + }, + { + "epoch": 0.5092028474315398, + "grad_norm": 52.740699768066406, + "learning_rate": 5e-06, + "loss": 1.2747, + "num_input_tokens_seen": 250784564, + "step": 1985 + }, + { + "epoch": 0.5092028474315398, + "loss": 1.2550649642944336, + "loss_ce": 0.0021352036856114864, + "loss_iou": 0.58203125, + "loss_num": 0.0181884765625, + "loss_xval": 1.25, + "num_input_tokens_seen": 250784564, + "step": 1985 + }, + { + "epoch": 0.5094593727954851, + "grad_norm": 75.71601867675781, + "learning_rate": 5e-06, + "loss": 1.049, + "num_input_tokens_seen": 250910336, + "step": 1986 + }, + { + "epoch": 0.5094593727954851, + "loss": 1.1344959735870361, + "loss_ce": 0.0011952494969591498, + "loss_iou": 0.51171875, + "loss_num": 0.0224609375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 250910336, + "step": 1986 + }, + { + "epoch": 0.5097158981594305, + "grad_norm": 48.29861068725586, + "learning_rate": 5e-06, + "loss": 1.0019, + "num_input_tokens_seen": 251036144, + "step": 1987 + }, + { + "epoch": 0.5097158981594305, + "loss": 1.1514601707458496, + "loss_ce": 0.0015577529557049274, + "loss_iou": 0.515625, + "loss_num": 0.0240478515625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 251036144, + "step": 1987 + }, + { + "epoch": 0.5099724235233759, + "grad_norm": 61.368141174316406, + "learning_rate": 5e-06, + "loss": 1.0238, + "num_input_tokens_seen": 251161980, + "step": 1988 + }, + { + "epoch": 0.5099724235233759, + "loss": 0.9719153642654419, + "loss_ce": 0.00023571330530103296, + "loss_iou": 0.458984375, + "loss_num": 0.01043701171875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 251161980, + "step": 1988 + }, + { + "epoch": 0.5102289488873213, + "grad_norm": 61.796295166015625, + "learning_rate": 5e-06, + "loss": 1.0777, + "num_input_tokens_seen": 251288756, + "step": 1989 + }, + { + "epoch": 0.5102289488873213, + "loss": 1.0182539224624634, + "loss_ce": 0.002628915011882782, + "loss_iou": 0.46484375, + "loss_num": 0.01708984375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 251288756, + "step": 1989 + }, + { + "epoch": 0.5104854742512666, + "grad_norm": 65.69779968261719, + "learning_rate": 5e-06, + "loss": 0.9743, + "num_input_tokens_seen": 251414672, + "step": 1990 + }, + { + "epoch": 0.5104854742512666, + "loss": 1.002121925354004, + "loss_ce": 0.0011453827610239387, + "loss_iou": 0.482421875, + "loss_num": 0.00714111328125, + "loss_xval": 1.0, + "num_input_tokens_seen": 251414672, + "step": 1990 + }, + { + "epoch": 0.5107419996152119, + "grad_norm": 42.62310028076172, + "learning_rate": 5e-06, + "loss": 1.0512, + "num_input_tokens_seen": 251540608, + "step": 1991 + }, + { + "epoch": 0.5107419996152119, + "loss": 1.0379486083984375, + "loss_ce": 0.0008391728042624891, + "loss_iou": 0.486328125, + "loss_num": 0.01263427734375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 251540608, + "step": 1991 + }, + { + "epoch": 0.5109985249791573, + "grad_norm": 43.345741271972656, + "learning_rate": 5e-06, + "loss": 1.0367, + "num_input_tokens_seen": 251666524, + "step": 1992 + }, + { + "epoch": 0.5109985249791573, + "loss": 1.0926636457443237, + "loss_ce": 0.002819840796291828, + "loss_iou": 0.470703125, + "loss_num": 0.0294189453125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 251666524, + "step": 1992 + }, + { + "epoch": 0.5112550503431027, + "grad_norm": 34.22275924682617, + "learning_rate": 5e-06, + "loss": 0.9231, + "num_input_tokens_seen": 251794112, + "step": 1993 + }, + { + "epoch": 0.5112550503431027, + "loss": 0.9508673548698425, + "loss_ce": 0.002381016733124852, + "loss_iou": 0.431640625, + "loss_num": 0.0172119140625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 251794112, + "step": 1993 + }, + { + "epoch": 0.5115115757070481, + "grad_norm": 38.710960388183594, + "learning_rate": 5e-06, + "loss": 0.9232, + "num_input_tokens_seen": 251920564, + "step": 1994 + }, + { + "epoch": 0.5115115757070481, + "loss": 1.0167114734649658, + "loss_ce": 0.0005981270223855972, + "loss_iou": 0.44140625, + "loss_num": 0.0267333984375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 251920564, + "step": 1994 + }, + { + "epoch": 0.5117681010709934, + "grad_norm": 45.97953796386719, + "learning_rate": 5e-06, + "loss": 1.0083, + "num_input_tokens_seen": 252046848, + "step": 1995 + }, + { + "epoch": 0.5117681010709934, + "loss": 1.1631770133972168, + "loss_ce": 0.00057939940597862, + "loss_iou": 0.54296875, + "loss_num": 0.01519775390625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 252046848, + "step": 1995 + }, + { + "epoch": 0.5120246264349387, + "grad_norm": 41.141563415527344, + "learning_rate": 5e-06, + "loss": 0.9372, + "num_input_tokens_seen": 252173700, + "step": 1996 + }, + { + "epoch": 0.5120246264349387, + "loss": 0.9254911541938782, + "loss_ce": 0.004104464314877987, + "loss_iou": 0.423828125, + "loss_num": 0.0147705078125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 252173700, + "step": 1996 + }, + { + "epoch": 0.5122811517988841, + "grad_norm": 49.35930633544922, + "learning_rate": 5e-06, + "loss": 0.9692, + "num_input_tokens_seen": 252299332, + "step": 1997 + }, + { + "epoch": 0.5122811517988841, + "loss": 1.0402331352233887, + "loss_ce": 0.008006537333130836, + "loss_iou": 0.47265625, + "loss_num": 0.016845703125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 252299332, + "step": 1997 + }, + { + "epoch": 0.5125376771628295, + "grad_norm": 48.03907012939453, + "learning_rate": 5e-06, + "loss": 1.1805, + "num_input_tokens_seen": 252425552, + "step": 1998 + }, + { + "epoch": 0.5125376771628295, + "loss": 1.225752830505371, + "loss_ce": 0.002608337439596653, + "loss_iou": 0.546875, + "loss_num": 0.0255126953125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 252425552, + "step": 1998 + }, + { + "epoch": 0.5127942025267749, + "grad_norm": 55.809364318847656, + "learning_rate": 5e-06, + "loss": 1.0101, + "num_input_tokens_seen": 252552256, + "step": 1999 + }, + { + "epoch": 0.5127942025267749, + "loss": 1.0398286581039429, + "loss_ce": 0.00027791125467047095, + "loss_iou": 0.462890625, + "loss_num": 0.022705078125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 252552256, + "step": 1999 + }, + { + "epoch": 0.5130507278907201, + "grad_norm": 49.3867073059082, + "learning_rate": 5e-06, + "loss": 1.1307, + "num_input_tokens_seen": 252679016, + "step": 2000 + }, + { + "epoch": 0.5130507278907201, + "eval_icons_CIoU": 0.16832323744893074, + "eval_icons_GIoU": 0.13109151925891638, + "eval_icons_IoU": 0.368585005402565, + "eval_icons_MAE_all": 0.0342010073363781, + "eval_icons_MAE_h": 0.05807036720216274, + "eval_icons_MAE_w": 0.05678812600672245, + "eval_icons_MAE_x_boxes": 0.056773215532302856, + "eval_icons_MAE_y_boxes": 0.052764393389225006, + "eval_icons_NUM_probability": 0.999769389629364, + "eval_icons_inside_bbox": 0.5815972238779068, + "eval_icons_loss": 1.8363127708435059, + "eval_icons_loss_ce": 5.781662912340835e-05, + "eval_icons_loss_iou": 0.8375244140625, + "eval_icons_loss_num": 0.03726005554199219, + "eval_icons_loss_xval": 1.862060546875, + "eval_icons_runtime": 48.034, + "eval_icons_samples_per_second": 1.041, + "eval_icons_steps_per_second": 0.042, + "num_input_tokens_seen": 252679016, + "step": 2000 + }, + { + "epoch": 0.5130507278907201, + "eval_screenspot_CIoU": 0.1256368706623713, + "eval_screenspot_GIoU": 0.11456992849707603, + "eval_screenspot_IoU": 0.29288529853026074, + "eval_screenspot_MAE_all": 0.0788506269454956, + "eval_screenspot_MAE_h": 0.05711434533198675, + "eval_screenspot_MAE_w": 0.13206478456656137, + "eval_screenspot_MAE_x_boxes": 0.11205907414356868, + "eval_screenspot_MAE_y_boxes": 0.05093859260280927, + "eval_screenspot_NUM_probability": 0.9998437364896139, + "eval_screenspot_inside_bbox": 0.6358333428700765, + "eval_screenspot_loss": 2.2124693393707275, + "eval_screenspot_loss_ce": 0.0015019784914329648, + "eval_screenspot_loss_iou": 0.9165852864583334, + "eval_screenspot_loss_num": 0.085205078125, + "eval_screenspot_loss_xval": 2.259765625, + "eval_screenspot_runtime": 109.1459, + "eval_screenspot_samples_per_second": 0.815, + "eval_screenspot_steps_per_second": 0.027, + "num_input_tokens_seen": 252679016, + "step": 2000 + }, + { + "epoch": 0.5130507278907201, + "loss": 2.171994686126709, + "loss_ce": 0.001096243504434824, + "loss_iou": 0.89453125, + "loss_num": 0.076171875, + "loss_xval": 2.171875, + "num_input_tokens_seen": 252679016, + "step": 2000 + }, + { + "epoch": 0.5133072532546655, + "grad_norm": 53.315330505371094, + "learning_rate": 5e-06, + "loss": 0.896, + "num_input_tokens_seen": 252805816, + "step": 2001 + }, + { + "epoch": 0.5133072532546655, + "loss": 1.0649224519729614, + "loss_ce": 0.0019342233426868916, + "loss_iou": 0.49609375, + "loss_num": 0.0137939453125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 252805816, + "step": 2001 + }, + { + "epoch": 0.5135637786186109, + "grad_norm": 51.42131423950195, + "learning_rate": 5e-06, + "loss": 1.007, + "num_input_tokens_seen": 252931044, + "step": 2002 + }, + { + "epoch": 0.5135637786186109, + "loss": 1.0445877313613892, + "loss_ce": 0.0016189826419577003, + "loss_iou": 0.474609375, + "loss_num": 0.0185546875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 252931044, + "step": 2002 + }, + { + "epoch": 0.5138203039825563, + "grad_norm": 66.39872741699219, + "learning_rate": 5e-06, + "loss": 0.9869, + "num_input_tokens_seen": 253058096, + "step": 2003 + }, + { + "epoch": 0.5138203039825563, + "loss": 1.1486905813217163, + "loss_ce": 0.004647629801183939, + "loss_iou": 0.5078125, + "loss_num": 0.0262451171875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 253058096, + "step": 2003 + }, + { + "epoch": 0.5140768293465017, + "grad_norm": 70.35369873046875, + "learning_rate": 5e-06, + "loss": 1.0672, + "num_input_tokens_seen": 253184688, + "step": 2004 + }, + { + "epoch": 0.5140768293465017, + "loss": 1.1281483173370361, + "loss_ce": 0.0016834231792017817, + "loss_iou": 0.51171875, + "loss_num": 0.0213623046875, + "loss_xval": 1.125, + "num_input_tokens_seen": 253184688, + "step": 2004 + }, + { + "epoch": 0.5143333547104469, + "grad_norm": 44.32569885253906, + "learning_rate": 5e-06, + "loss": 0.9608, + "num_input_tokens_seen": 253310832, + "step": 2005 + }, + { + "epoch": 0.5143333547104469, + "loss": 0.796124279499054, + "loss_ce": 0.0012023926246911287, + "loss_iou": 0.369140625, + "loss_num": 0.01129150390625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 253310832, + "step": 2005 + }, + { + "epoch": 0.5145898800743923, + "grad_norm": 50.17256164550781, + "learning_rate": 5e-06, + "loss": 1.101, + "num_input_tokens_seen": 253438044, + "step": 2006 + }, + { + "epoch": 0.5145898800743923, + "loss": 0.9657532572746277, + "loss_ce": 0.00042118330020457506, + "loss_iou": 0.453125, + "loss_num": 0.01226806640625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 253438044, + "step": 2006 + }, + { + "epoch": 0.5148464054383377, + "grad_norm": 79.54283142089844, + "learning_rate": 5e-06, + "loss": 1.0793, + "num_input_tokens_seen": 253564572, + "step": 2007 + }, + { + "epoch": 0.5148464054383377, + "loss": 1.0568351745605469, + "loss_ce": 0.0001945913245435804, + "loss_iou": 0.48828125, + "loss_num": 0.0162353515625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 253564572, + "step": 2007 + }, + { + "epoch": 0.5151029308022831, + "grad_norm": 56.104915618896484, + "learning_rate": 5e-06, + "loss": 1.0671, + "num_input_tokens_seen": 253691240, + "step": 2008 + }, + { + "epoch": 0.5151029308022831, + "loss": 1.1161574125289917, + "loss_ce": 0.0018995684804394841, + "loss_iou": 0.51171875, + "loss_num": 0.017822265625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 253691240, + "step": 2008 + }, + { + "epoch": 0.5153594561662285, + "grad_norm": 35.95633316040039, + "learning_rate": 5e-06, + "loss": 1.0097, + "num_input_tokens_seen": 253817952, + "step": 2009 + }, + { + "epoch": 0.5153594561662285, + "loss": 0.9539680480957031, + "loss_ce": 0.0008430131711065769, + "loss_iou": 0.443359375, + "loss_num": 0.012939453125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 253817952, + "step": 2009 + }, + { + "epoch": 0.5156159815301739, + "grad_norm": 47.13933181762695, + "learning_rate": 5e-06, + "loss": 1.1237, + "num_input_tokens_seen": 253944624, + "step": 2010 + }, + { + "epoch": 0.5156159815301739, + "loss": 1.221038579940796, + "loss_ce": 0.0008237491128966212, + "loss_iou": 0.56640625, + "loss_num": 0.016845703125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 253944624, + "step": 2010 + }, + { + "epoch": 0.5158725068941191, + "grad_norm": 93.35911560058594, + "learning_rate": 5e-06, + "loss": 1.0442, + "num_input_tokens_seen": 254071168, + "step": 2011 + }, + { + "epoch": 0.5158725068941191, + "loss": 1.2433799505233765, + "loss_ce": 0.000704150297679007, + "loss_iou": 0.5625, + "loss_num": 0.0235595703125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 254071168, + "step": 2011 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 52.84657669067383, + "learning_rate": 5e-06, + "loss": 1.2481, + "num_input_tokens_seen": 254197656, + "step": 2012 + }, + { + "epoch": 0.5161290322580645, + "loss": 1.2436964511871338, + "loss_ce": 0.001508961315266788, + "loss_iou": 0.56640625, + "loss_num": 0.0211181640625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 254197656, + "step": 2012 + }, + { + "epoch": 0.5163855576220099, + "grad_norm": 38.08906173706055, + "learning_rate": 5e-06, + "loss": 1.1064, + "num_input_tokens_seen": 254323976, + "step": 2013 + }, + { + "epoch": 0.5163855576220099, + "loss": 1.0199476480484009, + "loss_ce": 0.001393006299622357, + "loss_iou": 0.451171875, + "loss_num": 0.022705078125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 254323976, + "step": 2013 + }, + { + "epoch": 0.5166420829859553, + "grad_norm": 47.89059829711914, + "learning_rate": 5e-06, + "loss": 0.847, + "num_input_tokens_seen": 254450192, + "step": 2014 + }, + { + "epoch": 0.5166420829859553, + "loss": 0.7565430402755737, + "loss_ce": 0.00019540336506906897, + "loss_iou": 0.361328125, + "loss_num": 0.0067138671875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 254450192, + "step": 2014 + }, + { + "epoch": 0.5168986083499006, + "grad_norm": 52.407894134521484, + "learning_rate": 5e-06, + "loss": 0.9314, + "num_input_tokens_seen": 254576696, + "step": 2015 + }, + { + "epoch": 0.5168986083499006, + "loss": 0.8725275993347168, + "loss_ce": 0.0004572817706502974, + "loss_iou": 0.40625, + "loss_num": 0.01202392578125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 254576696, + "step": 2015 + }, + { + "epoch": 0.5171551337138459, + "grad_norm": 52.08160400390625, + "learning_rate": 5e-06, + "loss": 0.9975, + "num_input_tokens_seen": 254702608, + "step": 2016 + }, + { + "epoch": 0.5171551337138459, + "loss": 0.9522646069526672, + "loss_ce": 0.001092717400752008, + "loss_iou": 0.4375, + "loss_num": 0.01531982421875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 254702608, + "step": 2016 + }, + { + "epoch": 0.5174116590777913, + "grad_norm": 49.670387268066406, + "learning_rate": 5e-06, + "loss": 1.0737, + "num_input_tokens_seen": 254828244, + "step": 2017 + }, + { + "epoch": 0.5174116590777913, + "loss": 1.0801076889038086, + "loss_ce": 0.00027374300407245755, + "loss_iou": 0.486328125, + "loss_num": 0.0218505859375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 254828244, + "step": 2017 + }, + { + "epoch": 0.5176681844417367, + "grad_norm": 51.07948303222656, + "learning_rate": 5e-06, + "loss": 0.981, + "num_input_tokens_seen": 254955456, + "step": 2018 + }, + { + "epoch": 0.5176681844417367, + "loss": 1.0525212287902832, + "loss_ce": 0.0007634421926923096, + "loss_iou": 0.46484375, + "loss_num": 0.0242919921875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 254955456, + "step": 2018 + }, + { + "epoch": 0.5179247098056821, + "grad_norm": 48.830841064453125, + "learning_rate": 5e-06, + "loss": 1.0348, + "num_input_tokens_seen": 255080444, + "step": 2019 + }, + { + "epoch": 0.5179247098056821, + "loss": 1.0323619842529297, + "loss_ce": 0.00013537262566387653, + "loss_iou": 0.478515625, + "loss_num": 0.0146484375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 255080444, + "step": 2019 + }, + { + "epoch": 0.5181812351696274, + "grad_norm": 49.204986572265625, + "learning_rate": 5e-06, + "loss": 0.9025, + "num_input_tokens_seen": 255206940, + "step": 2020 + }, + { + "epoch": 0.5181812351696274, + "loss": 0.8655695915222168, + "loss_ce": 0.0013117878697812557, + "loss_iou": 0.408203125, + "loss_num": 0.009521484375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 255206940, + "step": 2020 + }, + { + "epoch": 0.5184377605335727, + "grad_norm": 57.278892517089844, + "learning_rate": 5e-06, + "loss": 1.1594, + "num_input_tokens_seen": 255332792, + "step": 2021 + }, + { + "epoch": 0.5184377605335727, + "loss": 1.0450923442840576, + "loss_ce": 0.0016352500533685088, + "loss_iou": 0.466796875, + "loss_num": 0.022216796875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 255332792, + "step": 2021 + }, + { + "epoch": 0.5186942858975181, + "grad_norm": 73.57088470458984, + "learning_rate": 5e-06, + "loss": 1.0729, + "num_input_tokens_seen": 255460044, + "step": 2022 + }, + { + "epoch": 0.5186942858975181, + "loss": 1.0575311183929443, + "loss_ce": 0.0008905873401090503, + "loss_iou": 0.494140625, + "loss_num": 0.01385498046875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 255460044, + "step": 2022 + }, + { + "epoch": 0.5189508112614635, + "grad_norm": 46.59572219848633, + "learning_rate": 5e-06, + "loss": 1.1011, + "num_input_tokens_seen": 255585808, + "step": 2023 + }, + { + "epoch": 0.5189508112614635, + "loss": 1.0083049535751343, + "loss_ce": 0.0009807954775169492, + "loss_iou": 0.474609375, + "loss_num": 0.01141357421875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 255585808, + "step": 2023 + }, + { + "epoch": 0.5192073366254089, + "grad_norm": 48.697776794433594, + "learning_rate": 5e-06, + "loss": 0.991, + "num_input_tokens_seen": 255711720, + "step": 2024 + }, + { + "epoch": 0.5192073366254089, + "loss": 1.0130096673965454, + "loss_ce": 0.0022674871142953634, + "loss_iou": 0.470703125, + "loss_num": 0.01385498046875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 255711720, + "step": 2024 + }, + { + "epoch": 0.5194638619893542, + "grad_norm": 55.978389739990234, + "learning_rate": 5e-06, + "loss": 0.9849, + "num_input_tokens_seen": 255837452, + "step": 2025 + }, + { + "epoch": 0.5194638619893542, + "loss": 0.912277102470398, + "loss_ce": 0.00041184999281540513, + "loss_iou": 0.423828125, + "loss_num": 0.01287841796875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 255837452, + "step": 2025 + }, + { + "epoch": 0.5197203873532995, + "grad_norm": 141.40829467773438, + "learning_rate": 5e-06, + "loss": 1.0816, + "num_input_tokens_seen": 255965056, + "step": 2026 + }, + { + "epoch": 0.5197203873532995, + "loss": 1.2914857864379883, + "loss_ce": 0.0029116380028426647, + "loss_iou": 0.57421875, + "loss_num": 0.028564453125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 255965056, + "step": 2026 + }, + { + "epoch": 0.5199769127172449, + "grad_norm": 59.3145637512207, + "learning_rate": 5e-06, + "loss": 1.048, + "num_input_tokens_seen": 256091300, + "step": 2027 + }, + { + "epoch": 0.5199769127172449, + "loss": 1.1682484149932861, + "loss_ce": 0.0007678656256757677, + "loss_iou": 0.5390625, + "loss_num": 0.018798828125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 256091300, + "step": 2027 + }, + { + "epoch": 0.5202334380811903, + "grad_norm": 57.07319641113281, + "learning_rate": 5e-06, + "loss": 1.1075, + "num_input_tokens_seen": 256218600, + "step": 2028 + }, + { + "epoch": 0.5202334380811903, + "loss": 1.1069482564926147, + "loss_ce": 0.0014795939205214381, + "loss_iou": 0.51171875, + "loss_num": 0.0162353515625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 256218600, + "step": 2028 + }, + { + "epoch": 0.5204899634451357, + "grad_norm": 51.766685485839844, + "learning_rate": 5e-06, + "loss": 1.0117, + "num_input_tokens_seen": 256344944, + "step": 2029 + }, + { + "epoch": 0.5204899634451357, + "loss": 0.8272620439529419, + "loss_ce": 0.0013343081809580326, + "loss_iou": 0.392578125, + "loss_num": 0.008056640625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 256344944, + "step": 2029 + }, + { + "epoch": 0.520746488809081, + "grad_norm": 85.35752868652344, + "learning_rate": 5e-06, + "loss": 1.1202, + "num_input_tokens_seen": 256471548, + "step": 2030 + }, + { + "epoch": 0.520746488809081, + "loss": 1.0634057521820068, + "loss_ce": 0.0009058131254278123, + "loss_iou": 0.478515625, + "loss_num": 0.0208740234375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 256471548, + "step": 2030 + }, + { + "epoch": 0.5210030141730264, + "grad_norm": 73.8112564086914, + "learning_rate": 5e-06, + "loss": 0.9844, + "num_input_tokens_seen": 256598072, + "step": 2031 + }, + { + "epoch": 0.5210030141730264, + "loss": 0.8499078750610352, + "loss_ce": 0.00029845177778042853, + "loss_iou": 0.40625, + "loss_num": 0.007537841796875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 256598072, + "step": 2031 + }, + { + "epoch": 0.5212595395369717, + "grad_norm": 52.91657257080078, + "learning_rate": 5e-06, + "loss": 1.0901, + "num_input_tokens_seen": 256724476, + "step": 2032 + }, + { + "epoch": 0.5212595395369717, + "loss": 0.9766416549682617, + "loss_ce": 0.0005674446583725512, + "loss_iou": 0.4609375, + "loss_num": 0.01129150390625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 256724476, + "step": 2032 + }, + { + "epoch": 0.5215160649009171, + "grad_norm": 48.08242416381836, + "learning_rate": 5e-06, + "loss": 1.0007, + "num_input_tokens_seen": 256851344, + "step": 2033 + }, + { + "epoch": 0.5215160649009171, + "loss": 0.8402217626571655, + "loss_ce": 0.0003780190891120583, + "loss_iou": 0.40625, + "loss_num": 0.00567626953125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 256851344, + "step": 2033 + }, + { + "epoch": 0.5217725902648624, + "grad_norm": 54.82283401489258, + "learning_rate": 5e-06, + "loss": 1.0981, + "num_input_tokens_seen": 256978552, + "step": 2034 + }, + { + "epoch": 0.5217725902648624, + "loss": 1.1385711431503296, + "loss_ce": 0.00234073493629694, + "loss_iou": 0.5234375, + "loss_num": 0.018310546875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 256978552, + "step": 2034 + }, + { + "epoch": 0.5220291156288078, + "grad_norm": 59.975669860839844, + "learning_rate": 5e-06, + "loss": 1.1197, + "num_input_tokens_seen": 257104932, + "step": 2035 + }, + { + "epoch": 0.5220291156288078, + "loss": 1.1193156242370605, + "loss_ce": 0.0016398427542299032, + "loss_iou": 0.51171875, + "loss_num": 0.0184326171875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 257104932, + "step": 2035 + }, + { + "epoch": 0.5222856409927532, + "grad_norm": 63.6566162109375, + "learning_rate": 5e-06, + "loss": 1.1048, + "num_input_tokens_seen": 257230780, + "step": 2036 + }, + { + "epoch": 0.5222856409927532, + "loss": 1.1401309967041016, + "loss_ce": 0.0014591026119887829, + "loss_iou": 0.51953125, + "loss_num": 0.0201416015625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 257230780, + "step": 2036 + }, + { + "epoch": 0.5225421663566985, + "grad_norm": 47.8028678894043, + "learning_rate": 5e-06, + "loss": 1.0493, + "num_input_tokens_seen": 257357820, + "step": 2037 + }, + { + "epoch": 0.5225421663566985, + "loss": 0.9849605560302734, + "loss_ce": 9.72605193965137e-05, + "loss_iou": 0.470703125, + "loss_num": 0.00872802734375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 257357820, + "step": 2037 + }, + { + "epoch": 0.5227986917206439, + "grad_norm": 43.6710090637207, + "learning_rate": 5e-06, + "loss": 1.0734, + "num_input_tokens_seen": 257483148, + "step": 2038 + }, + { + "epoch": 0.5227986917206439, + "loss": 1.1903860569000244, + "loss_ce": 0.000444635224994272, + "loss_iou": 0.55078125, + "loss_num": 0.0181884765625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 257483148, + "step": 2038 + }, + { + "epoch": 0.5230552170845892, + "grad_norm": 54.831512451171875, + "learning_rate": 5e-06, + "loss": 1.0236, + "num_input_tokens_seen": 257607924, + "step": 2039 + }, + { + "epoch": 0.5230552170845892, + "loss": 1.138108491897583, + "loss_ce": 0.0009014400420710444, + "loss_iou": 0.51171875, + "loss_num": 0.022705078125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 257607924, + "step": 2039 + }, + { + "epoch": 0.5233117424485346, + "grad_norm": 57.807411193847656, + "learning_rate": 5e-06, + "loss": 1.0037, + "num_input_tokens_seen": 257734008, + "step": 2040 + }, + { + "epoch": 0.5233117424485346, + "loss": 1.1398019790649414, + "loss_ce": 0.004059840925037861, + "loss_iou": 0.5078125, + "loss_num": 0.023681640625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 257734008, + "step": 2040 + }, + { + "epoch": 0.52356826781248, + "grad_norm": 69.98892211914062, + "learning_rate": 5e-06, + "loss": 0.9693, + "num_input_tokens_seen": 257860340, + "step": 2041 + }, + { + "epoch": 0.52356826781248, + "loss": 0.9668854475021362, + "loss_ce": 8.855860505718738e-05, + "loss_iou": 0.447265625, + "loss_num": 0.0146484375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 257860340, + "step": 2041 + }, + { + "epoch": 0.5238247931764253, + "grad_norm": 61.258827209472656, + "learning_rate": 5e-06, + "loss": 1.0401, + "num_input_tokens_seen": 257986784, + "step": 2042 + }, + { + "epoch": 0.5238247931764253, + "loss": 1.189571499824524, + "loss_ce": 0.0010949037969112396, + "loss_iou": 0.5625, + "loss_num": 0.0125732421875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 257986784, + "step": 2042 + }, + { + "epoch": 0.5240813185403707, + "grad_norm": 99.4783935546875, + "learning_rate": 5e-06, + "loss": 1.0921, + "num_input_tokens_seen": 258113020, + "step": 2043 + }, + { + "epoch": 0.5240813185403707, + "loss": 1.2599425315856934, + "loss_ce": 0.004083174280822277, + "loss_iou": 0.57421875, + "loss_num": 0.021728515625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 258113020, + "step": 2043 + }, + { + "epoch": 0.524337843904316, + "grad_norm": 54.923439025878906, + "learning_rate": 5e-06, + "loss": 1.1706, + "num_input_tokens_seen": 258239404, + "step": 2044 + }, + { + "epoch": 0.524337843904316, + "loss": 1.2532235383987427, + "loss_ce": 0.000293885066639632, + "loss_iou": 0.56640625, + "loss_num": 0.0242919921875, + "loss_xval": 1.25, + "num_input_tokens_seen": 258239404, + "step": 2044 + }, + { + "epoch": 0.5245943692682614, + "grad_norm": 28.975839614868164, + "learning_rate": 5e-06, + "loss": 1.0054, + "num_input_tokens_seen": 258365476, + "step": 2045 + }, + { + "epoch": 0.5245943692682614, + "loss": 1.03302001953125, + "loss_ce": 0.001281818374991417, + "loss_iou": 0.478515625, + "loss_num": 0.01495361328125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 258365476, + "step": 2045 + }, + { + "epoch": 0.5248508946322068, + "grad_norm": 31.99435806274414, + "learning_rate": 5e-06, + "loss": 1.1046, + "num_input_tokens_seen": 258491948, + "step": 2046 + }, + { + "epoch": 0.5248508946322068, + "loss": 1.1749693155288696, + "loss_ce": 0.007488864008337259, + "loss_iou": 0.5078125, + "loss_num": 0.02978515625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 258491948, + "step": 2046 + }, + { + "epoch": 0.5251074199961521, + "grad_norm": 38.08057403564453, + "learning_rate": 5e-06, + "loss": 0.9719, + "num_input_tokens_seen": 258616240, + "step": 2047 + }, + { + "epoch": 0.5251074199961521, + "loss": 1.218906044960022, + "loss_ce": 0.00113257288467139, + "loss_iou": 0.5390625, + "loss_num": 0.028076171875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 258616240, + "step": 2047 + }, + { + "epoch": 0.5253639453600975, + "grad_norm": 56.168601989746094, + "learning_rate": 5e-06, + "loss": 0.9933, + "num_input_tokens_seen": 258742892, + "step": 2048 + }, + { + "epoch": 0.5253639453600975, + "loss": 0.9749884605407715, + "loss_ce": 0.000867391237989068, + "loss_iou": 0.44921875, + "loss_num": 0.01470947265625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 258742892, + "step": 2048 + }, + { + "epoch": 0.5256204707240428, + "grad_norm": 53.860172271728516, + "learning_rate": 5e-06, + "loss": 1.106, + "num_input_tokens_seen": 258868968, + "step": 2049 + }, + { + "epoch": 0.5256204707240428, + "loss": 0.9289553165435791, + "loss_ce": 0.008545160293579102, + "loss_iou": 0.435546875, + "loss_num": 0.0101318359375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 258868968, + "step": 2049 + }, + { + "epoch": 0.5258769960879882, + "grad_norm": 28.930753707885742, + "learning_rate": 5e-06, + "loss": 0.8525, + "num_input_tokens_seen": 258994852, + "step": 2050 + }, + { + "epoch": 0.5258769960879882, + "loss": 0.8064014911651611, + "loss_ce": 0.0012257290072739124, + "loss_iou": 0.376953125, + "loss_num": 0.00994873046875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 258994852, + "step": 2050 + }, + { + "epoch": 0.5261335214519336, + "grad_norm": 24.546236038208008, + "learning_rate": 5e-06, + "loss": 0.9749, + "num_input_tokens_seen": 259122016, + "step": 2051 + }, + { + "epoch": 0.5261335214519336, + "loss": 0.9845966100692749, + "loss_ce": 0.00022161187371239066, + "loss_iou": 0.455078125, + "loss_num": 0.014892578125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 259122016, + "step": 2051 + }, + { + "epoch": 0.5263900468158789, + "grad_norm": 35.74541091918945, + "learning_rate": 5e-06, + "loss": 1.0124, + "num_input_tokens_seen": 259247668, + "step": 2052 + }, + { + "epoch": 0.5263900468158789, + "loss": 1.080437183380127, + "loss_ce": 0.0003591269487515092, + "loss_iou": 0.4921875, + "loss_num": 0.0185546875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 259247668, + "step": 2052 + }, + { + "epoch": 0.5266465721798242, + "grad_norm": 49.129947662353516, + "learning_rate": 5e-06, + "loss": 1.1275, + "num_input_tokens_seen": 259374152, + "step": 2053 + }, + { + "epoch": 0.5266465721798242, + "loss": 1.292370319366455, + "loss_ce": 0.002331278519704938, + "loss_iou": 0.58984375, + "loss_num": 0.022705078125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 259374152, + "step": 2053 + }, + { + "epoch": 0.5269030975437696, + "grad_norm": 55.699676513671875, + "learning_rate": 5e-06, + "loss": 1.2525, + "num_input_tokens_seen": 259500288, + "step": 2054 + }, + { + "epoch": 0.5269030975437696, + "loss": 1.3176875114440918, + "loss_ce": 0.002257848624140024, + "loss_iou": 0.5859375, + "loss_num": 0.0284423828125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 259500288, + "step": 2054 + }, + { + "epoch": 0.527159622907715, + "grad_norm": 70.89736938476562, + "learning_rate": 5e-06, + "loss": 1.1015, + "num_input_tokens_seen": 259626808, + "step": 2055 + }, + { + "epoch": 0.527159622907715, + "loss": 1.157155990600586, + "loss_ce": 0.003347366815432906, + "loss_iou": 0.53515625, + "loss_num": 0.0174560546875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 259626808, + "step": 2055 + }, + { + "epoch": 0.5274161482716604, + "grad_norm": 79.22554016113281, + "learning_rate": 5e-06, + "loss": 1.1293, + "num_input_tokens_seen": 259752656, + "step": 2056 + }, + { + "epoch": 0.5274161482716604, + "loss": 1.118213176727295, + "loss_ce": 0.0005374052561819553, + "loss_iou": 0.515625, + "loss_num": 0.01806640625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 259752656, + "step": 2056 + }, + { + "epoch": 0.5276726736356058, + "grad_norm": 52.5753173828125, + "learning_rate": 5e-06, + "loss": 0.9561, + "num_input_tokens_seen": 259879636, + "step": 2057 + }, + { + "epoch": 0.5276726736356058, + "loss": 0.8082152605056763, + "loss_ce": 0.0015746058197692037, + "loss_iou": 0.38671875, + "loss_num": 0.006805419921875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 259879636, + "step": 2057 + }, + { + "epoch": 0.527929198999551, + "grad_norm": 72.6696548461914, + "learning_rate": 5e-06, + "loss": 1.1571, + "num_input_tokens_seen": 260006756, + "step": 2058 + }, + { + "epoch": 0.527929198999551, + "loss": 1.1130599975585938, + "loss_ce": 0.001243620296008885, + "loss_iou": 0.5078125, + "loss_num": 0.019287109375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 260006756, + "step": 2058 + }, + { + "epoch": 0.5281857243634964, + "grad_norm": 58.3787956237793, + "learning_rate": 5e-06, + "loss": 0.9144, + "num_input_tokens_seen": 260133156, + "step": 2059 + }, + { + "epoch": 0.5281857243634964, + "loss": 0.7451873421669006, + "loss_ce": 7.017231837380677e-05, + "loss_iou": 0.357421875, + "loss_num": 0.00634765625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 260133156, + "step": 2059 + }, + { + "epoch": 0.5284422497274418, + "grad_norm": 47.23629379272461, + "learning_rate": 5e-06, + "loss": 1.0811, + "num_input_tokens_seen": 260259704, + "step": 2060 + }, + { + "epoch": 0.5284422497274418, + "loss": 1.0826222896575928, + "loss_ce": 0.0005910230102017522, + "loss_iou": 0.50390625, + "loss_num": 0.01507568359375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 260259704, + "step": 2060 + }, + { + "epoch": 0.5286987750913872, + "grad_norm": 45.07194900512695, + "learning_rate": 5e-06, + "loss": 1.0278, + "num_input_tokens_seen": 260385484, + "step": 2061 + }, + { + "epoch": 0.5286987750913872, + "loss": 1.2559621334075928, + "loss_ce": 0.005962177645415068, + "loss_iou": 0.56640625, + "loss_num": 0.023193359375, + "loss_xval": 1.25, + "num_input_tokens_seen": 260385484, + "step": 2061 + }, + { + "epoch": 0.5289553004553326, + "grad_norm": 58.328887939453125, + "learning_rate": 5e-06, + "loss": 1.0639, + "num_input_tokens_seen": 260511584, + "step": 2062 + }, + { + "epoch": 0.5289553004553326, + "loss": 0.9388800859451294, + "loss_ce": 0.00040350109338760376, + "loss_iou": 0.4296875, + "loss_num": 0.0155029296875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 260511584, + "step": 2062 + }, + { + "epoch": 0.5292118258192778, + "grad_norm": 80.0151596069336, + "learning_rate": 5e-06, + "loss": 1.0568, + "num_input_tokens_seen": 260638412, + "step": 2063 + }, + { + "epoch": 0.5292118258192778, + "loss": 1.0850272178649902, + "loss_ce": 0.0015310811577364802, + "loss_iou": 0.5078125, + "loss_num": 0.0130615234375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 260638412, + "step": 2063 + }, + { + "epoch": 0.5294683511832232, + "grad_norm": 51.03153991699219, + "learning_rate": 5e-06, + "loss": 1.1857, + "num_input_tokens_seen": 260763976, + "step": 2064 + }, + { + "epoch": 0.5294683511832232, + "loss": 1.1942700147628784, + "loss_ce": 0.0035961430985480547, + "loss_iou": 0.53125, + "loss_num": 0.025634765625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 260763976, + "step": 2064 + }, + { + "epoch": 0.5297248765471686, + "grad_norm": 36.663814544677734, + "learning_rate": 5e-06, + "loss": 0.9701, + "num_input_tokens_seen": 260889900, + "step": 2065 + }, + { + "epoch": 0.5297248765471686, + "loss": 0.9601141810417175, + "loss_ce": 0.0006415415555238724, + "loss_iou": 0.443359375, + "loss_num": 0.014404296875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 260889900, + "step": 2065 + }, + { + "epoch": 0.529981401911114, + "grad_norm": 63.989768981933594, + "learning_rate": 5e-06, + "loss": 1.1121, + "num_input_tokens_seen": 261017176, + "step": 2066 + }, + { + "epoch": 0.529981401911114, + "loss": 1.0892438888549805, + "loss_ce": 0.003550530644133687, + "loss_iou": 0.48828125, + "loss_num": 0.021484375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 261017176, + "step": 2066 + }, + { + "epoch": 0.5302379272750594, + "grad_norm": 43.2659797668457, + "learning_rate": 5e-06, + "loss": 1.067, + "num_input_tokens_seen": 261143296, + "step": 2067 + }, + { + "epoch": 0.5302379272750594, + "loss": 1.0381898880004883, + "loss_ce": 0.0005922214477322996, + "loss_iou": 0.474609375, + "loss_num": 0.017578125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 261143296, + "step": 2067 + }, + { + "epoch": 0.5304944526390046, + "grad_norm": 24.66347312927246, + "learning_rate": 5e-06, + "loss": 1.0494, + "num_input_tokens_seen": 261269520, + "step": 2068 + }, + { + "epoch": 0.5304944526390046, + "loss": 1.1168646812438965, + "loss_ce": 0.001630280865356326, + "loss_iou": 0.515625, + "loss_num": 0.0169677734375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 261269520, + "step": 2068 + }, + { + "epoch": 0.53075097800295, + "grad_norm": 49.395694732666016, + "learning_rate": 5e-06, + "loss": 1.0559, + "num_input_tokens_seen": 261394532, + "step": 2069 + }, + { + "epoch": 0.53075097800295, + "loss": 1.1534628868103027, + "loss_ce": 0.0011192008387297392, + "loss_iou": 0.52734375, + "loss_num": 0.019287109375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 261394532, + "step": 2069 + }, + { + "epoch": 0.5310075033668954, + "grad_norm": 48.912147521972656, + "learning_rate": 5e-06, + "loss": 1.1035, + "num_input_tokens_seen": 261521384, + "step": 2070 + }, + { + "epoch": 0.5310075033668954, + "loss": 0.9166247844696045, + "loss_ce": 0.00036501127760857344, + "loss_iou": 0.439453125, + "loss_num": 0.007781982421875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 261521384, + "step": 2070 + }, + { + "epoch": 0.5312640287308408, + "grad_norm": 30.840654373168945, + "learning_rate": 5e-06, + "loss": 1.0761, + "num_input_tokens_seen": 261647248, + "step": 2071 + }, + { + "epoch": 0.5312640287308408, + "loss": 1.0800025463104248, + "loss_ce": 0.00920168962329626, + "loss_iou": 0.48046875, + "loss_num": 0.02197265625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 261647248, + "step": 2071 + }, + { + "epoch": 0.5315205540947862, + "grad_norm": 54.6402587890625, + "learning_rate": 5e-06, + "loss": 1.0997, + "num_input_tokens_seen": 261773372, + "step": 2072 + }, + { + "epoch": 0.5315205540947862, + "loss": 1.1462395191192627, + "loss_ce": 0.00024345166457351297, + "loss_iou": 0.5234375, + "loss_num": 0.0201416015625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 261773372, + "step": 2072 + }, + { + "epoch": 0.5317770794587314, + "grad_norm": 50.31151580810547, + "learning_rate": 5e-06, + "loss": 0.9231, + "num_input_tokens_seen": 261898904, + "step": 2073 + }, + { + "epoch": 0.5317770794587314, + "loss": 1.0098406076431274, + "loss_ce": 0.000563219073228538, + "loss_iou": 0.462890625, + "loss_num": 0.0164794921875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 261898904, + "step": 2073 + }, + { + "epoch": 0.5320336048226768, + "grad_norm": 45.12799072265625, + "learning_rate": 5e-06, + "loss": 0.9652, + "num_input_tokens_seen": 262026204, + "step": 2074 + }, + { + "epoch": 0.5320336048226768, + "loss": 0.7232120037078857, + "loss_ce": 0.0007999389781616628, + "loss_iou": 0.33984375, + "loss_num": 0.008544921875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 262026204, + "step": 2074 + }, + { + "epoch": 0.5322901301866222, + "grad_norm": 45.372276306152344, + "learning_rate": 5e-06, + "loss": 0.9666, + "num_input_tokens_seen": 262151720, + "step": 2075 + }, + { + "epoch": 0.5322901301866222, + "loss": 0.8098355531692505, + "loss_ce": 0.0002652480616234243, + "loss_iou": 0.3828125, + "loss_num": 0.009033203125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 262151720, + "step": 2075 + }, + { + "epoch": 0.5325466555505676, + "grad_norm": 31.510662078857422, + "learning_rate": 5e-06, + "loss": 0.931, + "num_input_tokens_seen": 262277756, + "step": 2076 + }, + { + "epoch": 0.5325466555505676, + "loss": 0.9901208877563477, + "loss_ce": 0.0003748129238374531, + "loss_iou": 0.447265625, + "loss_num": 0.0186767578125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 262277756, + "step": 2076 + }, + { + "epoch": 0.532803180914513, + "grad_norm": 38.17578887939453, + "learning_rate": 5e-06, + "loss": 1.0577, + "num_input_tokens_seen": 262404268, + "step": 2077 + }, + { + "epoch": 0.532803180914513, + "loss": 1.1061457395553589, + "loss_ce": 0.0016536276089027524, + "loss_iou": 0.50390625, + "loss_num": 0.01953125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 262404268, + "step": 2077 + }, + { + "epoch": 0.5330597062784583, + "grad_norm": 53.984859466552734, + "learning_rate": 5e-06, + "loss": 1.0049, + "num_input_tokens_seen": 262530152, + "step": 2078 + }, + { + "epoch": 0.5330597062784583, + "loss": 1.0169166326522827, + "loss_ce": 0.003733081975951791, + "loss_iou": 0.482421875, + "loss_num": 0.00927734375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 262530152, + "step": 2078 + }, + { + "epoch": 0.5333162316424036, + "grad_norm": 52.367210388183594, + "learning_rate": 5e-06, + "loss": 1.0207, + "num_input_tokens_seen": 262656448, + "step": 2079 + }, + { + "epoch": 0.5333162316424036, + "loss": 0.9975907206535339, + "loss_ce": 0.0005204096087254584, + "loss_iou": 0.46484375, + "loss_num": 0.01336669921875, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 262656448, + "step": 2079 + }, + { + "epoch": 0.533572757006349, + "grad_norm": 83.12340545654297, + "learning_rate": 5e-06, + "loss": 0.9883, + "num_input_tokens_seen": 262783272, + "step": 2080 + }, + { + "epoch": 0.533572757006349, + "loss": 1.0387104749679565, + "loss_ce": 0.0016011069528758526, + "loss_iou": 0.4921875, + "loss_num": 0.01007080078125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 262783272, + "step": 2080 + }, + { + "epoch": 0.5338292823702944, + "grad_norm": 49.9664421081543, + "learning_rate": 5e-06, + "loss": 1.0246, + "num_input_tokens_seen": 262910368, + "step": 2081 + }, + { + "epoch": 0.5338292823702944, + "loss": 1.1867914199829102, + "loss_ce": 0.006127288565039635, + "loss_iou": 0.51953125, + "loss_num": 0.02783203125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 262910368, + "step": 2081 + }, + { + "epoch": 0.5340858077342397, + "grad_norm": 49.16142272949219, + "learning_rate": 5e-06, + "loss": 1.038, + "num_input_tokens_seen": 263037104, + "step": 2082 + }, + { + "epoch": 0.5340858077342397, + "loss": 1.1410658359527588, + "loss_ce": 0.0004407914530020207, + "loss_iou": 0.5234375, + "loss_num": 0.01806640625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 263037104, + "step": 2082 + }, + { + "epoch": 0.5343423330981851, + "grad_norm": 33.93173599243164, + "learning_rate": 5e-06, + "loss": 1.1512, + "num_input_tokens_seen": 263163012, + "step": 2083 + }, + { + "epoch": 0.5343423330981851, + "loss": 1.2177568674087524, + "loss_ce": 0.001936559216119349, + "loss_iou": 0.5546875, + "loss_num": 0.02099609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 263163012, + "step": 2083 + }, + { + "epoch": 0.5345988584621304, + "grad_norm": 39.54280471801758, + "learning_rate": 5e-06, + "loss": 1.0599, + "num_input_tokens_seen": 263289380, + "step": 2084 + }, + { + "epoch": 0.5345988584621304, + "loss": 1.0225932598114014, + "loss_ce": 0.00037638566573150456, + "loss_iou": 0.482421875, + "loss_num": 0.0115966796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 263289380, + "step": 2084 + }, + { + "epoch": 0.5348553838260758, + "grad_norm": 45.672367095947266, + "learning_rate": 5e-06, + "loss": 1.1694, + "num_input_tokens_seen": 263414968, + "step": 2085 + }, + { + "epoch": 0.5348553838260758, + "loss": 1.2934010028839111, + "loss_ce": 0.0009206320391967893, + "loss_iou": 0.58203125, + "loss_num": 0.026123046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 263414968, + "step": 2085 + }, + { + "epoch": 0.5351119091900212, + "grad_norm": 55.92557144165039, + "learning_rate": 5e-06, + "loss": 0.9758, + "num_input_tokens_seen": 263541816, + "step": 2086 + }, + { + "epoch": 0.5351119091900212, + "loss": 1.0558348894119263, + "loss_ce": 0.0011474149068817496, + "loss_iou": 0.4921875, + "loss_num": 0.0142822265625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 263541816, + "step": 2086 + }, + { + "epoch": 0.5353684345539665, + "grad_norm": 49.26450729370117, + "learning_rate": 5e-06, + "loss": 1.1182, + "num_input_tokens_seen": 263667964, + "step": 2087 + }, + { + "epoch": 0.5353684345539665, + "loss": 1.134372353553772, + "loss_ce": 9.501622116658837e-05, + "loss_iou": 0.5234375, + "loss_num": 0.0181884765625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 263667964, + "step": 2087 + }, + { + "epoch": 0.5356249599179119, + "grad_norm": 72.46296691894531, + "learning_rate": 5e-06, + "loss": 0.9783, + "num_input_tokens_seen": 263794164, + "step": 2088 + }, + { + "epoch": 0.5356249599179119, + "loss": 1.0113887786865234, + "loss_ce": 0.0006465716869570315, + "loss_iou": 0.44921875, + "loss_num": 0.0224609375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 263794164, + "step": 2088 + }, + { + "epoch": 0.5358814852818572, + "grad_norm": 47.740135192871094, + "learning_rate": 5e-06, + "loss": 1.0123, + "num_input_tokens_seen": 263921292, + "step": 2089 + }, + { + "epoch": 0.5358814852818572, + "loss": 1.0711369514465332, + "loss_ce": 0.0003361757844686508, + "loss_iou": 0.5, + "loss_num": 0.013671875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 263921292, + "step": 2089 + }, + { + "epoch": 0.5361380106458026, + "grad_norm": 29.275177001953125, + "learning_rate": 5e-06, + "loss": 1.0673, + "num_input_tokens_seen": 264047640, + "step": 2090 + }, + { + "epoch": 0.5361380106458026, + "loss": 0.8927761316299438, + "loss_ce": 0.0001980327069759369, + "loss_iou": 0.41796875, + "loss_num": 0.01171875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 264047640, + "step": 2090 + }, + { + "epoch": 0.536394536009748, + "grad_norm": 37.165740966796875, + "learning_rate": 5e-06, + "loss": 0.9723, + "num_input_tokens_seen": 264174040, + "step": 2091 + }, + { + "epoch": 0.536394536009748, + "loss": 0.82999187707901, + "loss_ce": 0.0008903021225705743, + "loss_iou": 0.396484375, + "loss_num": 0.00750732421875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 264174040, + "step": 2091 + }, + { + "epoch": 0.5366510613736933, + "grad_norm": 40.464481353759766, + "learning_rate": 5e-06, + "loss": 0.9966, + "num_input_tokens_seen": 264299740, + "step": 2092 + }, + { + "epoch": 0.5366510613736933, + "loss": 0.9600374698638916, + "loss_ce": 0.0010531266452744603, + "loss_iou": 0.4453125, + "loss_num": 0.01324462890625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 264299740, + "step": 2092 + }, + { + "epoch": 0.5369075867376387, + "grad_norm": 50.954219818115234, + "learning_rate": 5e-06, + "loss": 1.005, + "num_input_tokens_seen": 264425748, + "step": 2093 + }, + { + "epoch": 0.5369075867376387, + "loss": 1.2320711612701416, + "loss_ce": 0.001114225946366787, + "loss_iou": 0.546875, + "loss_num": 0.0267333984375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 264425748, + "step": 2093 + }, + { + "epoch": 0.537164112101584, + "grad_norm": 56.79198455810547, + "learning_rate": 5e-06, + "loss": 1.0554, + "num_input_tokens_seen": 264551524, + "step": 2094 + }, + { + "epoch": 0.537164112101584, + "loss": 1.1725119352340698, + "loss_ce": 0.0011252060066908598, + "loss_iou": 0.54296875, + "loss_num": 0.0179443359375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 264551524, + "step": 2094 + }, + { + "epoch": 0.5374206374655294, + "grad_norm": 56.64177322387695, + "learning_rate": 5e-06, + "loss": 1.0224, + "num_input_tokens_seen": 264677548, + "step": 2095 + }, + { + "epoch": 0.5374206374655294, + "loss": 0.9041236639022827, + "loss_ce": 0.0017798690823838115, + "loss_iou": 0.421875, + "loss_num": 0.01190185546875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 264677548, + "step": 2095 + }, + { + "epoch": 0.5376771628294748, + "grad_norm": 72.38475799560547, + "learning_rate": 5e-06, + "loss": 1.0472, + "num_input_tokens_seen": 264804268, + "step": 2096 + }, + { + "epoch": 0.5376771628294748, + "loss": 0.8928656578063965, + "loss_ce": 0.0012641348876059055, + "loss_iou": 0.427734375, + "loss_num": 0.007568359375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 264804268, + "step": 2096 + }, + { + "epoch": 0.5379336881934201, + "grad_norm": 53.33441925048828, + "learning_rate": 5e-06, + "loss": 1.0501, + "num_input_tokens_seen": 264930808, + "step": 2097 + }, + { + "epoch": 0.5379336881934201, + "loss": 1.0408732891082764, + "loss_ce": 0.0008341382490471005, + "loss_iou": 0.4765625, + "loss_num": 0.0172119140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 264930808, + "step": 2097 + }, + { + "epoch": 0.5381902135573655, + "grad_norm": 37.2518310546875, + "learning_rate": 5e-06, + "loss": 0.9148, + "num_input_tokens_seen": 265056252, + "step": 2098 + }, + { + "epoch": 0.5381902135573655, + "loss": 0.9820123910903931, + "loss_ce": 0.002031923271715641, + "loss_iou": 0.44921875, + "loss_num": 0.016845703125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 265056252, + "step": 2098 + }, + { + "epoch": 0.5384467389213109, + "grad_norm": 43.92380142211914, + "learning_rate": 5e-06, + "loss": 1.0086, + "num_input_tokens_seen": 265182280, + "step": 2099 + }, + { + "epoch": 0.5384467389213109, + "loss": 1.1232231855392456, + "loss_ce": 0.00017635090625844896, + "loss_iou": 0.51953125, + "loss_num": 0.0172119140625, + "loss_xval": 1.125, + "num_input_tokens_seen": 265182280, + "step": 2099 + }, + { + "epoch": 0.5387032642852562, + "grad_norm": 46.41405487060547, + "learning_rate": 5e-06, + "loss": 1.0656, + "num_input_tokens_seen": 265307596, + "step": 2100 + }, + { + "epoch": 0.5387032642852562, + "loss": 0.9010828733444214, + "loss_ce": 0.00020397522894199938, + "loss_iou": 0.421875, + "loss_num": 0.0111083984375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 265307596, + "step": 2100 + }, + { + "epoch": 0.5389597896492015, + "grad_norm": 60.35308074951172, + "learning_rate": 5e-06, + "loss": 0.9506, + "num_input_tokens_seen": 265435036, + "step": 2101 + }, + { + "epoch": 0.5389597896492015, + "loss": 0.898063600063324, + "loss_ce": 0.00011439137597335503, + "loss_iou": 0.4296875, + "loss_num": 0.00799560546875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 265435036, + "step": 2101 + }, + { + "epoch": 0.5392163150131469, + "grad_norm": 67.0765609741211, + "learning_rate": 5e-06, + "loss": 1.0711, + "num_input_tokens_seen": 265561928, + "step": 2102 + }, + { + "epoch": 0.5392163150131469, + "loss": 1.253767967224121, + "loss_ce": 0.0008383337408304214, + "loss_iou": 0.546875, + "loss_num": 0.031494140625, + "loss_xval": 1.25, + "num_input_tokens_seen": 265561928, + "step": 2102 + }, + { + "epoch": 0.5394728403770923, + "grad_norm": 42.50312042236328, + "learning_rate": 5e-06, + "loss": 1.052, + "num_input_tokens_seen": 265688548, + "step": 2103 + }, + { + "epoch": 0.5394728403770923, + "loss": 1.1814442873001099, + "loss_ce": 0.0027332683093845844, + "loss_iou": 0.54296875, + "loss_num": 0.0179443359375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 265688548, + "step": 2103 + }, + { + "epoch": 0.5397293657410377, + "grad_norm": 36.89691162109375, + "learning_rate": 5e-06, + "loss": 0.8981, + "num_input_tokens_seen": 265815136, + "step": 2104 + }, + { + "epoch": 0.5397293657410377, + "loss": 0.7750831842422485, + "loss_ce": 0.0006691482849419117, + "loss_iou": 0.365234375, + "loss_num": 0.0089111328125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 265815136, + "step": 2104 + }, + { + "epoch": 0.539985891104983, + "grad_norm": 54.39409637451172, + "learning_rate": 5e-06, + "loss": 0.9647, + "num_input_tokens_seen": 265941888, + "step": 2105 + }, + { + "epoch": 0.539985891104983, + "loss": 0.8402824401855469, + "loss_ce": 0.0009269589791074395, + "loss_iou": 0.400390625, + "loss_num": 0.007781982421875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 265941888, + "step": 2105 + }, + { + "epoch": 0.5402424164689283, + "grad_norm": 94.0919418334961, + "learning_rate": 5e-06, + "loss": 0.948, + "num_input_tokens_seen": 266069588, + "step": 2106 + }, + { + "epoch": 0.5402424164689283, + "loss": 1.1261759996414185, + "loss_ce": 0.000687680090777576, + "loss_iou": 0.5078125, + "loss_num": 0.0211181640625, + "loss_xval": 1.125, + "num_input_tokens_seen": 266069588, + "step": 2106 + }, + { + "epoch": 0.5404989418328737, + "grad_norm": 46.682376861572266, + "learning_rate": 5e-06, + "loss": 1.0133, + "num_input_tokens_seen": 266195316, + "step": 2107 + }, + { + "epoch": 0.5404989418328737, + "loss": 1.013458490371704, + "loss_ce": 0.002227941993623972, + "loss_iou": 0.470703125, + "loss_num": 0.01385498046875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 266195316, + "step": 2107 + }, + { + "epoch": 0.5407554671968191, + "grad_norm": 47.0839729309082, + "learning_rate": 5e-06, + "loss": 1.1385, + "num_input_tokens_seen": 266321136, + "step": 2108 + }, + { + "epoch": 0.5407554671968191, + "loss": 1.0807347297668457, + "loss_ce": 0.00016831718676257879, + "loss_iou": 0.49609375, + "loss_num": 0.017822265625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 266321136, + "step": 2108 + }, + { + "epoch": 0.5410119925607645, + "grad_norm": 121.5799331665039, + "learning_rate": 5e-06, + "loss": 0.9561, + "num_input_tokens_seen": 266446684, + "step": 2109 + }, + { + "epoch": 0.5410119925607645, + "loss": 0.9339656829833984, + "loss_ce": 0.0020809448324143887, + "loss_iou": 0.42578125, + "loss_num": 0.0159912109375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 266446684, + "step": 2109 + }, + { + "epoch": 0.5412685179247098, + "grad_norm": 82.39623260498047, + "learning_rate": 5e-06, + "loss": 0.9443, + "num_input_tokens_seen": 266571880, + "step": 2110 + }, + { + "epoch": 0.5412685179247098, + "loss": 1.2025458812713623, + "loss_ce": 0.0018622784409672022, + "loss_iou": 0.5390625, + "loss_num": 0.02490234375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 266571880, + "step": 2110 + }, + { + "epoch": 0.5415250432886551, + "grad_norm": 66.57891082763672, + "learning_rate": 5e-06, + "loss": 1.0593, + "num_input_tokens_seen": 266697428, + "step": 2111 + }, + { + "epoch": 0.5415250432886551, + "loss": 1.0473248958587646, + "loss_ce": 0.0028913640417158604, + "loss_iou": 0.48828125, + "loss_num": 0.01348876953125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 266697428, + "step": 2111 + }, + { + "epoch": 0.5417815686526005, + "grad_norm": 53.899478912353516, + "learning_rate": 5e-06, + "loss": 1.1905, + "num_input_tokens_seen": 266823344, + "step": 2112 + }, + { + "epoch": 0.5417815686526005, + "loss": 1.0762308835983276, + "loss_ce": 0.0005472815246321261, + "loss_iou": 0.490234375, + "loss_num": 0.019287109375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 266823344, + "step": 2112 + }, + { + "epoch": 0.5420380940165459, + "grad_norm": 64.96749877929688, + "learning_rate": 5e-06, + "loss": 0.9231, + "num_input_tokens_seen": 266949492, + "step": 2113 + }, + { + "epoch": 0.5420380940165459, + "loss": 0.9782789945602417, + "loss_ce": 0.0002516743843443692, + "loss_iou": 0.451171875, + "loss_num": 0.01483154296875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 266949492, + "step": 2113 + }, + { + "epoch": 0.5422946193804913, + "grad_norm": 47.526214599609375, + "learning_rate": 5e-06, + "loss": 1.0716, + "num_input_tokens_seen": 267075680, + "step": 2114 + }, + { + "epoch": 0.5422946193804913, + "loss": 1.0502417087554932, + "loss_ce": 0.0023901346139609814, + "loss_iou": 0.48828125, + "loss_num": 0.0145263671875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 267075680, + "step": 2114 + }, + { + "epoch": 0.5425511447444366, + "grad_norm": 42.529056549072266, + "learning_rate": 5e-06, + "loss": 0.9864, + "num_input_tokens_seen": 267201856, + "step": 2115 + }, + { + "epoch": 0.5425511447444366, + "loss": 1.019618034362793, + "loss_ce": 0.0017958551179617643, + "loss_iou": 0.458984375, + "loss_num": 0.02001953125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 267201856, + "step": 2115 + }, + { + "epoch": 0.5428076701083819, + "grad_norm": 45.037940979003906, + "learning_rate": 5e-06, + "loss": 1.1219, + "num_input_tokens_seen": 267327800, + "step": 2116 + }, + { + "epoch": 0.5428076701083819, + "loss": 1.0114860534667969, + "loss_ce": 0.0007439473993144929, + "loss_iou": 0.474609375, + "loss_num": 0.01239013671875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 267327800, + "step": 2116 + }, + { + "epoch": 0.5430641954723273, + "grad_norm": 83.9783706665039, + "learning_rate": 5e-06, + "loss": 0.9942, + "num_input_tokens_seen": 267455312, + "step": 2117 + }, + { + "epoch": 0.5430641954723273, + "loss": 0.8030752539634705, + "loss_ce": 0.0008291855338029563, + "loss_iou": 0.380859375, + "loss_num": 0.00811767578125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 267455312, + "step": 2117 + }, + { + "epoch": 0.5433207208362727, + "grad_norm": 49.639915466308594, + "learning_rate": 5e-06, + "loss": 1.1809, + "num_input_tokens_seen": 267581432, + "step": 2118 + }, + { + "epoch": 0.5433207208362727, + "loss": 1.131028175354004, + "loss_ce": 0.00016879536269698292, + "loss_iou": 0.51171875, + "loss_num": 0.02099609375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 267581432, + "step": 2118 + }, + { + "epoch": 0.5435772462002181, + "grad_norm": 38.80513381958008, + "learning_rate": 5e-06, + "loss": 0.9699, + "num_input_tokens_seen": 267708640, + "step": 2119 + }, + { + "epoch": 0.5435772462002181, + "loss": 0.8178186416625977, + "loss_ce": 0.0004358667938504368, + "loss_iou": 0.384765625, + "loss_num": 0.00970458984375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 267708640, + "step": 2119 + }, + { + "epoch": 0.5438337715641635, + "grad_norm": 39.514888763427734, + "learning_rate": 5e-06, + "loss": 0.9586, + "num_input_tokens_seen": 267835092, + "step": 2120 + }, + { + "epoch": 0.5438337715641635, + "loss": 0.9574585556983948, + "loss_ce": 0.0009155633742921054, + "loss_iou": 0.439453125, + "loss_num": 0.0152587890625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 267835092, + "step": 2120 + }, + { + "epoch": 0.5440902969281087, + "grad_norm": 34.515296936035156, + "learning_rate": 5e-06, + "loss": 1.0174, + "num_input_tokens_seen": 267961232, + "step": 2121 + }, + { + "epoch": 0.5440902969281087, + "loss": 1.189612865447998, + "loss_ce": 0.003821855876594782, + "loss_iou": 0.5078125, + "loss_num": 0.033935546875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 267961232, + "step": 2121 + }, + { + "epoch": 0.5443468222920541, + "grad_norm": 38.70262145996094, + "learning_rate": 5e-06, + "loss": 1.121, + "num_input_tokens_seen": 268088204, + "step": 2122 + }, + { + "epoch": 0.5443468222920541, + "loss": 0.9582158923149109, + "loss_ce": 0.00020808368572033942, + "loss_iou": 0.44921875, + "loss_num": 0.0115966796875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 268088204, + "step": 2122 + }, + { + "epoch": 0.5446033476559995, + "grad_norm": 61.40644073486328, + "learning_rate": 5e-06, + "loss": 1.0146, + "num_input_tokens_seen": 268213664, + "step": 2123 + }, + { + "epoch": 0.5446033476559995, + "loss": 0.96445631980896, + "loss_ce": 0.001077447202987969, + "loss_iou": 0.462890625, + "loss_num": 0.007293701171875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 268213664, + "step": 2123 + }, + { + "epoch": 0.5448598730199449, + "grad_norm": 70.07954406738281, + "learning_rate": 5e-06, + "loss": 0.9306, + "num_input_tokens_seen": 268338852, + "step": 2124 + }, + { + "epoch": 0.5448598730199449, + "loss": 0.8717406988143921, + "loss_ce": 0.00015870352217461914, + "loss_iou": 0.400390625, + "loss_num": 0.0137939453125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 268338852, + "step": 2124 + }, + { + "epoch": 0.5451163983838903, + "grad_norm": 55.85931396484375, + "learning_rate": 5e-06, + "loss": 1.137, + "num_input_tokens_seen": 268464968, + "step": 2125 + }, + { + "epoch": 0.5451163983838903, + "loss": 1.2699817419052124, + "loss_ce": 0.0024035810492932796, + "loss_iou": 0.55859375, + "loss_num": 0.02978515625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 268464968, + "step": 2125 + }, + { + "epoch": 0.5453729237478355, + "grad_norm": 44.16478729248047, + "learning_rate": 5e-06, + "loss": 0.9297, + "num_input_tokens_seen": 268590184, + "step": 2126 + }, + { + "epoch": 0.5453729237478355, + "loss": 0.9815386533737183, + "loss_ce": 0.0005816075135953724, + "loss_iou": 0.45703125, + "loss_num": 0.01361083984375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 268590184, + "step": 2126 + }, + { + "epoch": 0.5456294491117809, + "grad_norm": 47.63579559326172, + "learning_rate": 5e-06, + "loss": 1.019, + "num_input_tokens_seen": 268716004, + "step": 2127 + }, + { + "epoch": 0.5456294491117809, + "loss": 0.8909671902656555, + "loss_ce": 0.0013187576550990343, + "loss_iou": 0.42578125, + "loss_num": 0.007781982421875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 268716004, + "step": 2127 + }, + { + "epoch": 0.5458859744757263, + "grad_norm": 55.660037994384766, + "learning_rate": 5e-06, + "loss": 1.039, + "num_input_tokens_seen": 268841724, + "step": 2128 + }, + { + "epoch": 0.5458859744757263, + "loss": 0.857921302318573, + "loss_ce": 0.000255302875302732, + "loss_iou": 0.408203125, + "loss_num": 0.008056640625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 268841724, + "step": 2128 + }, + { + "epoch": 0.5461424998396717, + "grad_norm": 55.69803237915039, + "learning_rate": 5e-06, + "loss": 0.9305, + "num_input_tokens_seen": 268968076, + "step": 2129 + }, + { + "epoch": 0.5461424998396717, + "loss": 0.8947121500968933, + "loss_ce": 0.0006691922899335623, + "loss_iou": 0.41015625, + "loss_num": 0.01434326171875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 268968076, + "step": 2129 + }, + { + "epoch": 0.546399025203617, + "grad_norm": 58.61482620239258, + "learning_rate": 5e-06, + "loss": 0.9976, + "num_input_tokens_seen": 269093888, + "step": 2130 + }, + { + "epoch": 0.546399025203617, + "loss": 0.8794206976890564, + "loss_ce": 0.0005144798778928816, + "loss_iou": 0.404296875, + "loss_num": 0.01416015625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 269093888, + "step": 2130 + }, + { + "epoch": 0.5466555505675623, + "grad_norm": 55.62767791748047, + "learning_rate": 5e-06, + "loss": 1.021, + "num_input_tokens_seen": 269219476, + "step": 2131 + }, + { + "epoch": 0.5466555505675623, + "loss": 1.015055537223816, + "loss_ce": 0.0004070880531799048, + "loss_iou": 0.4609375, + "loss_num": 0.0184326171875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 269219476, + "step": 2131 + }, + { + "epoch": 0.5469120759315077, + "grad_norm": 58.013999938964844, + "learning_rate": 5e-06, + "loss": 1.0366, + "num_input_tokens_seen": 269345816, + "step": 2132 + }, + { + "epoch": 0.5469120759315077, + "loss": 0.8907912373542786, + "loss_ce": 0.0016310925129801035, + "loss_iou": 0.421875, + "loss_num": 0.00885009765625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 269345816, + "step": 2132 + }, + { + "epoch": 0.5471686012954531, + "grad_norm": 53.57046127319336, + "learning_rate": 5e-06, + "loss": 0.9389, + "num_input_tokens_seen": 269471828, + "step": 2133 + }, + { + "epoch": 0.5471686012954531, + "loss": 0.962139368057251, + "loss_ce": 0.00022528968111146241, + "loss_iou": 0.45703125, + "loss_num": 0.00958251953125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 269471828, + "step": 2133 + }, + { + "epoch": 0.5474251266593985, + "grad_norm": 61.3003044128418, + "learning_rate": 5e-06, + "loss": 0.932, + "num_input_tokens_seen": 269598460, + "step": 2134 + }, + { + "epoch": 0.5474251266593985, + "loss": 1.0221776962280273, + "loss_ce": 0.0006932877004146576, + "loss_iou": 0.48828125, + "loss_num": 0.0086669921875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 269598460, + "step": 2134 + }, + { + "epoch": 0.5476816520233438, + "grad_norm": 78.01315307617188, + "learning_rate": 5e-06, + "loss": 1.0817, + "num_input_tokens_seen": 269725080, + "step": 2135 + }, + { + "epoch": 0.5476816520233438, + "loss": 1.1220242977142334, + "loss_ce": 0.008743060752749443, + "loss_iou": 0.51171875, + "loss_num": 0.017333984375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 269725080, + "step": 2135 + }, + { + "epoch": 0.5479381773872891, + "grad_norm": 34.92867660522461, + "learning_rate": 5e-06, + "loss": 1.1083, + "num_input_tokens_seen": 269852228, + "step": 2136 + }, + { + "epoch": 0.5479381773872891, + "loss": 1.1481385231018066, + "loss_ce": 0.0021424058359116316, + "loss_iou": 0.50390625, + "loss_num": 0.0283203125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 269852228, + "step": 2136 + }, + { + "epoch": 0.5481947027512345, + "grad_norm": 33.52834701538086, + "learning_rate": 5e-06, + "loss": 0.9694, + "num_input_tokens_seen": 269978028, + "step": 2137 + }, + { + "epoch": 0.5481947027512345, + "loss": 0.9784095287322998, + "loss_ce": 0.00038218265399336815, + "loss_iou": 0.458984375, + "loss_num": 0.011962890625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 269978028, + "step": 2137 + }, + { + "epoch": 0.5484512281151799, + "grad_norm": 49.01051330566406, + "learning_rate": 5e-06, + "loss": 1.0676, + "num_input_tokens_seen": 270102788, + "step": 2138 + }, + { + "epoch": 0.5484512281151799, + "loss": 1.2084213495254517, + "loss_ce": 0.003343298565596342, + "loss_iou": 0.5390625, + "loss_num": 0.0257568359375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 270102788, + "step": 2138 + }, + { + "epoch": 0.5487077534791253, + "grad_norm": 56.64397430419922, + "learning_rate": 5e-06, + "loss": 0.9598, + "num_input_tokens_seen": 270230132, + "step": 2139 + }, + { + "epoch": 0.5487077534791253, + "loss": 0.8916265368461609, + "loss_ce": 0.000757417525164783, + "loss_iou": 0.404296875, + "loss_num": 0.016357421875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 270230132, + "step": 2139 + }, + { + "epoch": 0.5489642788430706, + "grad_norm": 66.61978912353516, + "learning_rate": 5e-06, + "loss": 1.0814, + "num_input_tokens_seen": 270355536, + "step": 2140 + }, + { + "epoch": 0.5489642788430706, + "loss": 1.0252015590667725, + "loss_ce": 0.0002991966321133077, + "loss_iou": 0.46484375, + "loss_num": 0.019287109375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 270355536, + "step": 2140 + }, + { + "epoch": 0.549220804207016, + "grad_norm": 52.71894073486328, + "learning_rate": 5e-06, + "loss": 1.0015, + "num_input_tokens_seen": 270482284, + "step": 2141 + }, + { + "epoch": 0.549220804207016, + "loss": 0.8125232458114624, + "loss_ce": 0.0027088166680186987, + "loss_iou": 0.390625, + "loss_num": 0.005889892578125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 270482284, + "step": 2141 + }, + { + "epoch": 0.5494773295709613, + "grad_norm": 58.62114715576172, + "learning_rate": 5e-06, + "loss": 1.004, + "num_input_tokens_seen": 270607936, + "step": 2142 + }, + { + "epoch": 0.5494773295709613, + "loss": 0.9550107717514038, + "loss_ce": 0.0004209047183394432, + "loss_iou": 0.447265625, + "loss_num": 0.01202392578125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 270607936, + "step": 2142 + }, + { + "epoch": 0.5497338549349067, + "grad_norm": 56.06010437011719, + "learning_rate": 5e-06, + "loss": 1.009, + "num_input_tokens_seen": 270735312, + "step": 2143 + }, + { + "epoch": 0.5497338549349067, + "loss": 1.1710991859436035, + "loss_ce": 0.0006889746291562915, + "loss_iou": 0.51171875, + "loss_num": 0.0294189453125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 270735312, + "step": 2143 + }, + { + "epoch": 0.549990380298852, + "grad_norm": 81.80115509033203, + "learning_rate": 5e-06, + "loss": 0.8764, + "num_input_tokens_seen": 270862356, + "step": 2144 + }, + { + "epoch": 0.549990380298852, + "loss": 0.8314216136932373, + "loss_ce": 0.00036692360299639404, + "loss_iou": 0.3984375, + "loss_num": 0.007232666015625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 270862356, + "step": 2144 + }, + { + "epoch": 0.5502469056627974, + "grad_norm": 51.825828552246094, + "learning_rate": 5e-06, + "loss": 1.0258, + "num_input_tokens_seen": 270988392, + "step": 2145 + }, + { + "epoch": 0.5502469056627974, + "loss": 1.1485607624053955, + "loss_ce": 0.0006115525029599667, + "loss_iou": 0.5234375, + "loss_num": 0.0194091796875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 270988392, + "step": 2145 + }, + { + "epoch": 0.5505034310267428, + "grad_norm": 28.81943130493164, + "learning_rate": 5e-06, + "loss": 1.0492, + "num_input_tokens_seen": 271114836, + "step": 2146 + }, + { + "epoch": 0.5505034310267428, + "loss": 1.1482787132263184, + "loss_ce": 0.0008176739793270826, + "loss_iou": 0.5234375, + "loss_num": 0.02099609375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 271114836, + "step": 2146 + }, + { + "epoch": 0.5507599563906881, + "grad_norm": 61.013492584228516, + "learning_rate": 5e-06, + "loss": 0.9869, + "num_input_tokens_seen": 271241620, + "step": 2147 + }, + { + "epoch": 0.5507599563906881, + "loss": 1.1906027793884277, + "loss_ce": 0.001637935172766447, + "loss_iou": 0.5390625, + "loss_num": 0.0213623046875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 271241620, + "step": 2147 + }, + { + "epoch": 0.5510164817546335, + "grad_norm": 36.55144500732422, + "learning_rate": 5e-06, + "loss": 0.9661, + "num_input_tokens_seen": 271367496, + "step": 2148 + }, + { + "epoch": 0.5510164817546335, + "loss": 1.0258122682571411, + "loss_ce": 0.003107182215899229, + "loss_iou": 0.46875, + "loss_num": 0.0169677734375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 271367496, + "step": 2148 + }, + { + "epoch": 0.5512730071185789, + "grad_norm": 54.136756896972656, + "learning_rate": 5e-06, + "loss": 1.0658, + "num_input_tokens_seen": 271492988, + "step": 2149 + }, + { + "epoch": 0.5512730071185789, + "loss": 1.2213716506958008, + "loss_ce": 0.0011567166075110435, + "loss_iou": 0.5390625, + "loss_num": 0.0289306640625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 271492988, + "step": 2149 + }, + { + "epoch": 0.5515295324825242, + "grad_norm": 47.745819091796875, + "learning_rate": 5e-06, + "loss": 1.074, + "num_input_tokens_seen": 271619140, + "step": 2150 + }, + { + "epoch": 0.5515295324825242, + "loss": 1.0508285760879517, + "loss_ce": 0.001512161223217845, + "loss_iou": 0.478515625, + "loss_num": 0.0185546875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 271619140, + "step": 2150 + }, + { + "epoch": 0.5517860578464696, + "grad_norm": 44.93907928466797, + "learning_rate": 5e-06, + "loss": 0.9402, + "num_input_tokens_seen": 271746040, + "step": 2151 + }, + { + "epoch": 0.5517860578464696, + "loss": 1.0632290840148926, + "loss_ce": 0.0021939321886748075, + "loss_iou": 0.47265625, + "loss_num": 0.023681640625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 271746040, + "step": 2151 + }, + { + "epoch": 0.5520425832104149, + "grad_norm": 45.39997863769531, + "learning_rate": 5e-06, + "loss": 1.0234, + "num_input_tokens_seen": 271871752, + "step": 2152 + }, + { + "epoch": 0.5520425832104149, + "loss": 1.1427688598632812, + "loss_ce": 0.0016555471811443567, + "loss_iou": 0.5078125, + "loss_num": 0.0245361328125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 271871752, + "step": 2152 + }, + { + "epoch": 0.5522991085743603, + "grad_norm": 37.655242919921875, + "learning_rate": 5e-06, + "loss": 0.9044, + "num_input_tokens_seen": 271998424, + "step": 2153 + }, + { + "epoch": 0.5522991085743603, + "loss": 0.8761508464813232, + "loss_ce": 0.0006625698879361153, + "loss_iou": 0.41015625, + "loss_num": 0.01165771484375, + "loss_xval": 0.875, + "num_input_tokens_seen": 271998424, + "step": 2153 + }, + { + "epoch": 0.5525556339383056, + "grad_norm": 44.13722610473633, + "learning_rate": 5e-06, + "loss": 0.9682, + "num_input_tokens_seen": 272124068, + "step": 2154 + }, + { + "epoch": 0.5525556339383056, + "loss": 1.0537773370742798, + "loss_ce": 0.001531238667666912, + "loss_iou": 0.490234375, + "loss_num": 0.0140380859375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 272124068, + "step": 2154 + }, + { + "epoch": 0.552812159302251, + "grad_norm": 69.09748840332031, + "learning_rate": 5e-06, + "loss": 1.0137, + "num_input_tokens_seen": 272250312, + "step": 2155 + }, + { + "epoch": 0.552812159302251, + "loss": 1.1132103204727173, + "loss_ce": 0.0013938801130279899, + "loss_iou": 0.50390625, + "loss_num": 0.0208740234375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 272250312, + "step": 2155 + }, + { + "epoch": 0.5530686846661964, + "grad_norm": 46.14149856567383, + "learning_rate": 5e-06, + "loss": 1.0609, + "num_input_tokens_seen": 272376844, + "step": 2156 + }, + { + "epoch": 0.5530686846661964, + "loss": 0.9626413583755493, + "loss_ce": 0.0012156126322224736, + "loss_iou": 0.453125, + "loss_num": 0.01104736328125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 272376844, + "step": 2156 + }, + { + "epoch": 0.5533252100301417, + "grad_norm": 38.04602813720703, + "learning_rate": 5e-06, + "loss": 1.0121, + "num_input_tokens_seen": 272503132, + "step": 2157 + }, + { + "epoch": 0.5533252100301417, + "loss": 0.9741703271865845, + "loss_ce": 0.0020023779943585396, + "loss_iou": 0.46484375, + "loss_num": 0.0086669921875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 272503132, + "step": 2157 + }, + { + "epoch": 0.5535817353940871, + "grad_norm": 65.8387680053711, + "learning_rate": 5e-06, + "loss": 1.0699, + "num_input_tokens_seen": 272629612, + "step": 2158 + }, + { + "epoch": 0.5535817353940871, + "loss": 0.9537357091903687, + "loss_ce": 0.0006106970831751823, + "loss_iou": 0.44140625, + "loss_num": 0.0142822265625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 272629612, + "step": 2158 + }, + { + "epoch": 0.5538382607580324, + "grad_norm": 46.574363708496094, + "learning_rate": 5e-06, + "loss": 1.1261, + "num_input_tokens_seen": 272755824, + "step": 2159 + }, + { + "epoch": 0.5538382607580324, + "loss": 1.1293259859085083, + "loss_ce": 0.0013962624361738563, + "loss_iou": 0.53515625, + "loss_num": 0.0113525390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 272755824, + "step": 2159 + }, + { + "epoch": 0.5540947861219778, + "grad_norm": 19.270292282104492, + "learning_rate": 5e-06, + "loss": 0.9316, + "num_input_tokens_seen": 272882396, + "step": 2160 + }, + { + "epoch": 0.5540947861219778, + "loss": 0.8904005289077759, + "loss_ce": 0.00026382392388768494, + "loss_iou": 0.416015625, + "loss_num": 0.0115966796875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 272882396, + "step": 2160 + }, + { + "epoch": 0.5543513114859232, + "grad_norm": 49.630828857421875, + "learning_rate": 5e-06, + "loss": 0.9658, + "num_input_tokens_seen": 273007604, + "step": 2161 + }, + { + "epoch": 0.5543513114859232, + "loss": 0.9311999082565308, + "loss_ce": 0.0010241307318210602, + "loss_iou": 0.44140625, + "loss_num": 0.009521484375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 273007604, + "step": 2161 + }, + { + "epoch": 0.5546078368498686, + "grad_norm": 48.589176177978516, + "learning_rate": 5e-06, + "loss": 0.9535, + "num_input_tokens_seen": 273133924, + "step": 2162 + }, + { + "epoch": 0.5546078368498686, + "loss": 1.0551153421401978, + "loss_ce": 0.0014043827541172504, + "loss_iou": 0.470703125, + "loss_num": 0.022216796875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 273133924, + "step": 2162 + }, + { + "epoch": 0.5548643622138139, + "grad_norm": 72.10095977783203, + "learning_rate": 5e-06, + "loss": 1.0516, + "num_input_tokens_seen": 273259956, + "step": 2163 + }, + { + "epoch": 0.5548643622138139, + "loss": 1.0538641214370728, + "loss_ce": 0.0030828583985567093, + "loss_iou": 0.48046875, + "loss_num": 0.018310546875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 273259956, + "step": 2163 + }, + { + "epoch": 0.5551208875777592, + "grad_norm": 45.352169036865234, + "learning_rate": 5e-06, + "loss": 1.1766, + "num_input_tokens_seen": 273386328, + "step": 2164 + }, + { + "epoch": 0.5551208875777592, + "loss": 1.4658281803131104, + "loss_ce": 0.004402323625981808, + "loss_iou": 0.62890625, + "loss_num": 0.04150390625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 273386328, + "step": 2164 + }, + { + "epoch": 0.5553774129417046, + "grad_norm": 43.19377136230469, + "learning_rate": 5e-06, + "loss": 0.9162, + "num_input_tokens_seen": 273512804, + "step": 2165 + }, + { + "epoch": 0.5553774129417046, + "loss": 0.9367334246635437, + "loss_ce": 0.0002100106212310493, + "loss_iou": 0.4453125, + "loss_num": 0.00927734375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 273512804, + "step": 2165 + }, + { + "epoch": 0.55563393830565, + "grad_norm": 60.92735290527344, + "learning_rate": 5e-06, + "loss": 0.9659, + "num_input_tokens_seen": 273638044, + "step": 2166 + }, + { + "epoch": 0.55563393830565, + "loss": 0.9743404388427734, + "loss_ce": 0.0007076432812027633, + "loss_iou": 0.462890625, + "loss_num": 0.00970458984375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 273638044, + "step": 2166 + }, + { + "epoch": 0.5558904636695954, + "grad_norm": 43.77385330200195, + "learning_rate": 5e-06, + "loss": 0.9745, + "num_input_tokens_seen": 273763268, + "step": 2167 + }, + { + "epoch": 0.5558904636695954, + "loss": 1.0535348653793335, + "loss_ce": 0.0003121637855656445, + "loss_iou": 0.46875, + "loss_num": 0.02294921875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 273763268, + "step": 2167 + }, + { + "epoch": 0.5561469890335407, + "grad_norm": 31.93846321105957, + "learning_rate": 5e-06, + "loss": 1.0314, + "num_input_tokens_seen": 273890128, + "step": 2168 + }, + { + "epoch": 0.5561469890335407, + "loss": 1.172947883605957, + "loss_ce": 0.0025377371348440647, + "loss_iou": 0.51953125, + "loss_num": 0.0260009765625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 273890128, + "step": 2168 + }, + { + "epoch": 0.556403514397486, + "grad_norm": 37.651084899902344, + "learning_rate": 5e-06, + "loss": 1.0237, + "num_input_tokens_seen": 274016404, + "step": 2169 + }, + { + "epoch": 0.556403514397486, + "loss": 0.9080225825309753, + "loss_ce": 0.0037257422227412462, + "loss_iou": 0.4140625, + "loss_num": 0.01519775390625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 274016404, + "step": 2169 + }, + { + "epoch": 0.5566600397614314, + "grad_norm": 56.58539962768555, + "learning_rate": 5e-06, + "loss": 0.9609, + "num_input_tokens_seen": 274143596, + "step": 2170 + }, + { + "epoch": 0.5566600397614314, + "loss": 0.8073244094848633, + "loss_ce": 0.00019550076103769243, + "loss_iou": 0.388671875, + "loss_num": 0.006011962890625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 274143596, + "step": 2170 + }, + { + "epoch": 0.5569165651253768, + "grad_norm": 51.7929801940918, + "learning_rate": 5e-06, + "loss": 1.021, + "num_input_tokens_seen": 274269456, + "step": 2171 + }, + { + "epoch": 0.5569165651253768, + "loss": 1.1503279209136963, + "loss_ce": 0.0023787422105669975, + "loss_iou": 0.52734375, + "loss_num": 0.0191650390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 274269456, + "step": 2171 + }, + { + "epoch": 0.5571730904893222, + "grad_norm": 46.7864875793457, + "learning_rate": 5e-06, + "loss": 0.9114, + "num_input_tokens_seen": 274396188, + "step": 2172 + }, + { + "epoch": 0.5571730904893222, + "loss": 0.9158524870872498, + "loss_ce": 0.0003251858288422227, + "loss_iou": 0.42578125, + "loss_num": 0.01239013671875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 274396188, + "step": 2172 + }, + { + "epoch": 0.5574296158532674, + "grad_norm": 68.5341567993164, + "learning_rate": 5e-06, + "loss": 1.082, + "num_input_tokens_seen": 274522148, + "step": 2173 + }, + { + "epoch": 0.5574296158532674, + "loss": 0.9882373213768005, + "loss_ce": 0.00044439019984565675, + "loss_iou": 0.466796875, + "loss_num": 0.010498046875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 274522148, + "step": 2173 + }, + { + "epoch": 0.5576861412172128, + "grad_norm": 57.13827896118164, + "learning_rate": 5e-06, + "loss": 1.1023, + "num_input_tokens_seen": 274648868, + "step": 2174 + }, + { + "epoch": 0.5576861412172128, + "loss": 0.9503019452095032, + "loss_ce": 0.0001066099212039262, + "loss_iou": 0.44921875, + "loss_num": 0.0101318359375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 274648868, + "step": 2174 + }, + { + "epoch": 0.5579426665811582, + "grad_norm": 51.09037399291992, + "learning_rate": 5e-06, + "loss": 0.9284, + "num_input_tokens_seen": 274775484, + "step": 2175 + }, + { + "epoch": 0.5579426665811582, + "loss": 0.9014650583267212, + "loss_ce": 0.0010743903694674373, + "loss_iou": 0.416015625, + "loss_num": 0.0135498046875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 274775484, + "step": 2175 + }, + { + "epoch": 0.5581991919451036, + "grad_norm": 60.436851501464844, + "learning_rate": 5e-06, + "loss": 1.1312, + "num_input_tokens_seen": 274901408, + "step": 2176 + }, + { + "epoch": 0.5581991919451036, + "loss": 1.2494313716888428, + "loss_ce": 0.003825840540230274, + "loss_iou": 0.5546875, + "loss_num": 0.027587890625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 274901408, + "step": 2176 + }, + { + "epoch": 0.558455717309049, + "grad_norm": 50.890113830566406, + "learning_rate": 5e-06, + "loss": 1.1262, + "num_input_tokens_seen": 275027152, + "step": 2177 + }, + { + "epoch": 0.558455717309049, + "loss": 1.0987927913665771, + "loss_ce": 0.001136508770287037, + "loss_iou": 0.51171875, + "loss_num": 0.01513671875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 275027152, + "step": 2177 + }, + { + "epoch": 0.5587122426729942, + "grad_norm": 105.37041473388672, + "learning_rate": 5e-06, + "loss": 0.9344, + "num_input_tokens_seen": 275153912, + "step": 2178 + }, + { + "epoch": 0.5587122426729942, + "loss": 0.9479577541351318, + "loss_ce": 0.0016686981543898582, + "loss_iou": 0.427734375, + "loss_num": 0.0181884765625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 275153912, + "step": 2178 + }, + { + "epoch": 0.5589687680369396, + "grad_norm": 343.10333251953125, + "learning_rate": 5e-06, + "loss": 1.1029, + "num_input_tokens_seen": 275280132, + "step": 2179 + }, + { + "epoch": 0.5589687680369396, + "loss": 1.0131373405456543, + "loss_ce": 0.0028834636323153973, + "loss_iou": 0.46484375, + "loss_num": 0.0164794921875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 275280132, + "step": 2179 + }, + { + "epoch": 0.559225293400885, + "grad_norm": 56.179229736328125, + "learning_rate": 5e-06, + "loss": 1.0206, + "num_input_tokens_seen": 275406608, + "step": 2180 + }, + { + "epoch": 0.559225293400885, + "loss": 1.0449639558792114, + "loss_ce": 0.0010185850551351905, + "loss_iou": 0.48828125, + "loss_num": 0.0137939453125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 275406608, + "step": 2180 + }, + { + "epoch": 0.5594818187648304, + "grad_norm": 57.50232696533203, + "learning_rate": 5e-06, + "loss": 0.8696, + "num_input_tokens_seen": 275533088, + "step": 2181 + }, + { + "epoch": 0.5594818187648304, + "loss": 0.8450495004653931, + "loss_ce": 0.0003229244612157345, + "loss_iou": 0.40625, + "loss_num": 0.00653076171875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 275533088, + "step": 2181 + }, + { + "epoch": 0.5597383441287758, + "grad_norm": 51.98136901855469, + "learning_rate": 5e-06, + "loss": 0.9708, + "num_input_tokens_seen": 275659484, + "step": 2182 + }, + { + "epoch": 0.5597383441287758, + "loss": 0.9771950244903564, + "loss_ce": 0.00014423337415792048, + "loss_iou": 0.453125, + "loss_num": 0.0137939453125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 275659484, + "step": 2182 + }, + { + "epoch": 0.5599948694927211, + "grad_norm": 51.27824020385742, + "learning_rate": 5e-06, + "loss": 0.9906, + "num_input_tokens_seen": 275785536, + "step": 2183 + }, + { + "epoch": 0.5599948694927211, + "loss": 1.0937823057174683, + "loss_ce": 0.0014971477212384343, + "loss_iou": 0.49609375, + "loss_num": 0.0201416015625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 275785536, + "step": 2183 + }, + { + "epoch": 0.5602513948566664, + "grad_norm": 47.09531021118164, + "learning_rate": 5e-06, + "loss": 1.096, + "num_input_tokens_seen": 275911636, + "step": 2184 + }, + { + "epoch": 0.5602513948566664, + "loss": 1.093801736831665, + "loss_ce": 0.005422751419246197, + "loss_iou": 0.490234375, + "loss_num": 0.0213623046875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 275911636, + "step": 2184 + }, + { + "epoch": 0.5605079202206118, + "grad_norm": 50.552345275878906, + "learning_rate": 5e-06, + "loss": 0.982, + "num_input_tokens_seen": 276037480, + "step": 2185 + }, + { + "epoch": 0.5605079202206118, + "loss": 0.9279925227165222, + "loss_ce": 0.005140956491231918, + "loss_iou": 0.412109375, + "loss_num": 0.0201416015625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 276037480, + "step": 2185 + }, + { + "epoch": 0.5607644455845572, + "grad_norm": 61.35932922363281, + "learning_rate": 5e-06, + "loss": 1.0826, + "num_input_tokens_seen": 276164152, + "step": 2186 + }, + { + "epoch": 0.5607644455845572, + "loss": 1.094228982925415, + "loss_ce": 0.0009671769803389907, + "loss_iou": 0.51171875, + "loss_num": 0.013427734375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 276164152, + "step": 2186 + }, + { + "epoch": 0.5610209709485026, + "grad_norm": 51.68601608276367, + "learning_rate": 5e-06, + "loss": 1.0254, + "num_input_tokens_seen": 276289668, + "step": 2187 + }, + { + "epoch": 0.5610209709485026, + "loss": 1.2297756671905518, + "loss_ce": 0.0002835007035173476, + "loss_iou": 0.5390625, + "loss_num": 0.030029296875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 276289668, + "step": 2187 + }, + { + "epoch": 0.5612774963124479, + "grad_norm": 46.56258773803711, + "learning_rate": 5e-06, + "loss": 1.1104, + "num_input_tokens_seen": 276415748, + "step": 2188 + }, + { + "epoch": 0.5612774963124479, + "loss": 1.1846972703933716, + "loss_ce": 0.001103570917621255, + "loss_iou": 0.53515625, + "loss_num": 0.0224609375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 276415748, + "step": 2188 + }, + { + "epoch": 0.5615340216763932, + "grad_norm": 41.080543518066406, + "learning_rate": 5e-06, + "loss": 1.0198, + "num_input_tokens_seen": 276541088, + "step": 2189 + }, + { + "epoch": 0.5615340216763932, + "loss": 0.909066379070282, + "loss_ce": 0.003304713172838092, + "loss_iou": 0.4140625, + "loss_num": 0.015625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 276541088, + "step": 2189 + }, + { + "epoch": 0.5617905470403386, + "grad_norm": 468.84033203125, + "learning_rate": 5e-06, + "loss": 0.9918, + "num_input_tokens_seen": 276668488, + "step": 2190 + }, + { + "epoch": 0.5617905470403386, + "loss": 0.8983898758888245, + "loss_ce": 0.0009289373410865664, + "loss_iou": 0.419921875, + "loss_num": 0.01202392578125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 276668488, + "step": 2190 + }, + { + "epoch": 0.562047072404284, + "grad_norm": 75.52664184570312, + "learning_rate": 5e-06, + "loss": 1.0746, + "num_input_tokens_seen": 276795360, + "step": 2191 + }, + { + "epoch": 0.562047072404284, + "loss": 0.9311919212341309, + "loss_ce": 0.000527857628185302, + "loss_iou": 0.4375, + "loss_num": 0.0113525390625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 276795360, + "step": 2191 + }, + { + "epoch": 0.5623035977682294, + "grad_norm": 53.39921569824219, + "learning_rate": 5e-06, + "loss": 1.1318, + "num_input_tokens_seen": 276921932, + "step": 2192 + }, + { + "epoch": 0.5623035977682294, + "loss": 1.255314588546753, + "loss_ce": 0.0009199911146424711, + "loss_iou": 0.5859375, + "loss_num": 0.0167236328125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 276921932, + "step": 2192 + }, + { + "epoch": 0.5625601231321747, + "grad_norm": 57.0644645690918, + "learning_rate": 5e-06, + "loss": 0.9941, + "num_input_tokens_seen": 277047356, + "step": 2193 + }, + { + "epoch": 0.5625601231321747, + "loss": 1.1586862802505493, + "loss_ce": 0.004877721890807152, + "loss_iou": 0.5078125, + "loss_num": 0.0279541015625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 277047356, + "step": 2193 + }, + { + "epoch": 0.56281664849612, + "grad_norm": 53.68967819213867, + "learning_rate": 5e-06, + "loss": 1.1958, + "num_input_tokens_seen": 277174752, + "step": 2194 + }, + { + "epoch": 0.56281664849612, + "loss": 1.1968333721160889, + "loss_ce": 0.0015208676923066378, + "loss_iou": 0.5390625, + "loss_num": 0.0234375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 277174752, + "step": 2194 + }, + { + "epoch": 0.5630731738600654, + "grad_norm": 42.80198669433594, + "learning_rate": 5e-06, + "loss": 0.9375, + "num_input_tokens_seen": 277300808, + "step": 2195 + }, + { + "epoch": 0.5630731738600654, + "loss": 1.041845440864563, + "loss_ce": 0.0015621936181560159, + "loss_iou": 0.48046875, + "loss_num": 0.01556396484375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 277300808, + "step": 2195 + }, + { + "epoch": 0.5633296992240108, + "grad_norm": 52.26081466674805, + "learning_rate": 5e-06, + "loss": 1.0488, + "num_input_tokens_seen": 277427880, + "step": 2196 + }, + { + "epoch": 0.5633296992240108, + "loss": 1.2035095691680908, + "loss_ce": 0.0018494323594495654, + "loss_iou": 0.5390625, + "loss_num": 0.024169921875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 277427880, + "step": 2196 + }, + { + "epoch": 0.5635862245879562, + "grad_norm": 56.040767669677734, + "learning_rate": 5e-06, + "loss": 0.9834, + "num_input_tokens_seen": 277553108, + "step": 2197 + }, + { + "epoch": 0.5635862245879562, + "loss": 0.993122935295105, + "loss_ce": 0.002156094880774617, + "loss_iou": 0.458984375, + "loss_num": 0.014892578125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 277553108, + "step": 2197 + }, + { + "epoch": 0.5638427499519015, + "grad_norm": 55.097129821777344, + "learning_rate": 5e-06, + "loss": 0.9692, + "num_input_tokens_seen": 277679808, + "step": 2198 + }, + { + "epoch": 0.5638427499519015, + "loss": 0.8480400443077087, + "loss_ce": 0.0006279013468883932, + "loss_iou": 0.396484375, + "loss_num": 0.01104736328125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 277679808, + "step": 2198 + }, + { + "epoch": 0.5640992753158468, + "grad_norm": 112.22230529785156, + "learning_rate": 5e-06, + "loss": 1.0838, + "num_input_tokens_seen": 277805284, + "step": 2199 + }, + { + "epoch": 0.5640992753158468, + "loss": 0.8988468647003174, + "loss_ce": 0.0013859360478818417, + "loss_iou": 0.42578125, + "loss_num": 0.00897216796875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 277805284, + "step": 2199 + }, + { + "epoch": 0.5643558006797922, + "grad_norm": 45.435150146484375, + "learning_rate": 5e-06, + "loss": 1.0408, + "num_input_tokens_seen": 277930236, + "step": 2200 + }, + { + "epoch": 0.5643558006797922, + "loss": 1.0463415384292603, + "loss_ce": 0.0009313884656876326, + "loss_iou": 0.474609375, + "loss_num": 0.0189208984375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 277930236, + "step": 2200 + }, + { + "epoch": 0.5646123260437376, + "grad_norm": 34.56241226196289, + "learning_rate": 5e-06, + "loss": 0.9913, + "num_input_tokens_seen": 278056956, + "step": 2201 + }, + { + "epoch": 0.5646123260437376, + "loss": 0.9187591075897217, + "loss_ce": 0.0007903319783508778, + "loss_iou": 0.4296875, + "loss_num": 0.0115966796875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 278056956, + "step": 2201 + }, + { + "epoch": 0.564868851407683, + "grad_norm": 66.69220733642578, + "learning_rate": 5e-06, + "loss": 1.186, + "num_input_tokens_seen": 278183860, + "step": 2202 + }, + { + "epoch": 0.564868851407683, + "loss": 0.9285748600959778, + "loss_ce": 0.0003521769540384412, + "loss_iou": 0.44140625, + "loss_num": 0.00946044921875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 278183860, + "step": 2202 + }, + { + "epoch": 0.5651253767716283, + "grad_norm": 49.961769104003906, + "learning_rate": 5e-06, + "loss": 1.0482, + "num_input_tokens_seen": 278309520, + "step": 2203 + }, + { + "epoch": 0.5651253767716283, + "loss": 1.0594885349273682, + "loss_ce": 0.0008947264868766069, + "loss_iou": 0.49609375, + "loss_num": 0.0133056640625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 278309520, + "step": 2203 + }, + { + "epoch": 0.5653819021355736, + "grad_norm": 45.11125564575195, + "learning_rate": 5e-06, + "loss": 0.9627, + "num_input_tokens_seen": 278436368, + "step": 2204 + }, + { + "epoch": 0.5653819021355736, + "loss": 1.0071486234664917, + "loss_ce": 0.0020216715056449175, + "loss_iou": 0.453125, + "loss_num": 0.019775390625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 278436368, + "step": 2204 + }, + { + "epoch": 0.565638427499519, + "grad_norm": 36.24090576171875, + "learning_rate": 5e-06, + "loss": 1.0089, + "num_input_tokens_seen": 278561216, + "step": 2205 + }, + { + "epoch": 0.565638427499519, + "loss": 1.1077934503555298, + "loss_ce": 0.0042778197675943375, + "loss_iou": 0.498046875, + "loss_num": 0.0213623046875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 278561216, + "step": 2205 + }, + { + "epoch": 0.5658949528634644, + "grad_norm": 54.82352828979492, + "learning_rate": 5e-06, + "loss": 0.9473, + "num_input_tokens_seen": 278688268, + "step": 2206 + }, + { + "epoch": 0.5658949528634644, + "loss": 0.8368304967880249, + "loss_ce": 0.0013813143596053123, + "loss_iou": 0.3828125, + "loss_num": 0.01385498046875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 278688268, + "step": 2206 + }, + { + "epoch": 0.5661514782274097, + "grad_norm": 56.65019989013672, + "learning_rate": 5e-06, + "loss": 1.008, + "num_input_tokens_seen": 278813836, + "step": 2207 + }, + { + "epoch": 0.5661514782274097, + "loss": 0.9092596769332886, + "loss_ce": 0.0015448674093931913, + "loss_iou": 0.4296875, + "loss_num": 0.009765625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 278813836, + "step": 2207 + }, + { + "epoch": 0.5664080035913551, + "grad_norm": 62.40962600708008, + "learning_rate": 5e-06, + "loss": 1.0768, + "num_input_tokens_seen": 278940024, + "step": 2208 + }, + { + "epoch": 0.5664080035913551, + "loss": 0.9135690927505493, + "loss_ce": 0.0024362581316381693, + "loss_iou": 0.41796875, + "loss_num": 0.01495361328125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 278940024, + "step": 2208 + }, + { + "epoch": 0.5666645289553005, + "grad_norm": 47.486122131347656, + "learning_rate": 5e-06, + "loss": 0.9105, + "num_input_tokens_seen": 279066336, + "step": 2209 + }, + { + "epoch": 0.5666645289553005, + "loss": 0.8077210187911987, + "loss_ce": 0.00010379517334513366, + "loss_iou": 0.384765625, + "loss_num": 0.0074462890625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 279066336, + "step": 2209 + }, + { + "epoch": 0.5669210543192458, + "grad_norm": 62.20322799682617, + "learning_rate": 5e-06, + "loss": 1.0743, + "num_input_tokens_seen": 279192956, + "step": 2210 + }, + { + "epoch": 0.5669210543192458, + "loss": 0.9369065165519714, + "loss_ce": 0.00038305958150886, + "loss_iou": 0.44140625, + "loss_num": 0.010986328125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 279192956, + "step": 2210 + }, + { + "epoch": 0.5671775796831912, + "grad_norm": 59.297035217285156, + "learning_rate": 5e-06, + "loss": 1.0973, + "num_input_tokens_seen": 279318492, + "step": 2211 + }, + { + "epoch": 0.5671775796831912, + "loss": 1.1324870586395264, + "loss_ce": 0.0011394446482881904, + "loss_iou": 0.51171875, + "loss_num": 0.02099609375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 279318492, + "step": 2211 + }, + { + "epoch": 0.5674341050471365, + "grad_norm": 26.751834869384766, + "learning_rate": 5e-06, + "loss": 0.9489, + "num_input_tokens_seen": 279444120, + "step": 2212 + }, + { + "epoch": 0.5674341050471365, + "loss": 1.0214115381240845, + "loss_ce": 0.0013919631019234657, + "loss_iou": 0.466796875, + "loss_num": 0.017578125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 279444120, + "step": 2212 + }, + { + "epoch": 0.5676906304110819, + "grad_norm": 29.238269805908203, + "learning_rate": 5e-06, + "loss": 1.026, + "num_input_tokens_seen": 279570148, + "step": 2213 + }, + { + "epoch": 0.5676906304110819, + "loss": 1.0340806245803833, + "loss_ce": 0.0013657421804964542, + "loss_iou": 0.46875, + "loss_num": 0.01904296875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 279570148, + "step": 2213 + }, + { + "epoch": 0.5679471557750273, + "grad_norm": 42.82862854003906, + "learning_rate": 5e-06, + "loss": 0.9109, + "num_input_tokens_seen": 279696524, + "step": 2214 + }, + { + "epoch": 0.5679471557750273, + "loss": 0.7711024880409241, + "loss_ce": 0.00010639210813678801, + "loss_iou": 0.369140625, + "loss_num": 0.006622314453125, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 279696524, + "step": 2214 + }, + { + "epoch": 0.5682036811389726, + "grad_norm": 50.94182205200195, + "learning_rate": 5e-06, + "loss": 0.964, + "num_input_tokens_seen": 279823304, + "step": 2215 + }, + { + "epoch": 0.5682036811389726, + "loss": 1.0122041702270508, + "loss_ce": 0.0014618970453739166, + "loss_iou": 0.470703125, + "loss_num": 0.01385498046875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 279823304, + "step": 2215 + }, + { + "epoch": 0.568460206502918, + "grad_norm": 69.28797912597656, + "learning_rate": 5e-06, + "loss": 1.0461, + "num_input_tokens_seen": 279949988, + "step": 2216 + }, + { + "epoch": 0.568460206502918, + "loss": 1.3474441766738892, + "loss_ce": 0.004182462580502033, + "loss_iou": 0.6171875, + "loss_num": 0.0225830078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 279949988, + "step": 2216 + }, + { + "epoch": 0.5687167318668633, + "grad_norm": 38.665924072265625, + "learning_rate": 5e-06, + "loss": 1.257, + "num_input_tokens_seen": 280075436, + "step": 2217 + }, + { + "epoch": 0.5687167318668633, + "loss": 1.0412718057632446, + "loss_ce": 0.00025615625781938434, + "loss_iou": 0.48046875, + "loss_num": 0.0157470703125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 280075436, + "step": 2217 + }, + { + "epoch": 0.5689732572308087, + "grad_norm": 29.59270477294922, + "learning_rate": 5e-06, + "loss": 0.9421, + "num_input_tokens_seen": 280201572, + "step": 2218 + }, + { + "epoch": 0.5689732572308087, + "loss": 0.9615882039070129, + "loss_ce": 0.0016272829379886389, + "loss_iou": 0.44140625, + "loss_num": 0.01519775390625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 280201572, + "step": 2218 + }, + { + "epoch": 0.5692297825947541, + "grad_norm": 44.76958465576172, + "learning_rate": 5e-06, + "loss": 0.85, + "num_input_tokens_seen": 280328408, + "step": 2219 + }, + { + "epoch": 0.5692297825947541, + "loss": 0.8659550547599792, + "loss_ce": 0.0007207130547612906, + "loss_iou": 0.41015625, + "loss_num": 0.00933837890625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 280328408, + "step": 2219 + }, + { + "epoch": 0.5694863079586994, + "grad_norm": 39.538536071777344, + "learning_rate": 5e-06, + "loss": 0.9607, + "num_input_tokens_seen": 280453620, + "step": 2220 + }, + { + "epoch": 0.5694863079586994, + "loss": 0.8607710599899292, + "loss_ce": 0.0004194822977297008, + "loss_iou": 0.408203125, + "loss_num": 0.008544921875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 280453620, + "step": 2220 + }, + { + "epoch": 0.5697428333226447, + "grad_norm": 57.23136520385742, + "learning_rate": 5e-06, + "loss": 1.0501, + "num_input_tokens_seen": 280580500, + "step": 2221 + }, + { + "epoch": 0.5697428333226447, + "loss": 0.8529943227767944, + "loss_ce": 0.003384919371455908, + "loss_iou": 0.404296875, + "loss_num": 0.008056640625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 280580500, + "step": 2221 + }, + { + "epoch": 0.5699993586865901, + "grad_norm": 60.38134002685547, + "learning_rate": 5e-06, + "loss": 1.0059, + "num_input_tokens_seen": 280707212, + "step": 2222 + }, + { + "epoch": 0.5699993586865901, + "loss": 1.0389050245285034, + "loss_ce": 0.0008190611843019724, + "loss_iou": 0.4765625, + "loss_num": 0.0172119140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 280707212, + "step": 2222 + }, + { + "epoch": 0.5702558840505355, + "grad_norm": 157.96572875976562, + "learning_rate": 5e-06, + "loss": 0.9434, + "num_input_tokens_seen": 280831068, + "step": 2223 + }, + { + "epoch": 0.5702558840505355, + "loss": 1.156415343284607, + "loss_ce": 0.0026067497674375772, + "loss_iou": 0.51171875, + "loss_num": 0.025634765625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 280831068, + "step": 2223 + }, + { + "epoch": 0.5705124094144809, + "grad_norm": 55.338218688964844, + "learning_rate": 5e-06, + "loss": 0.9914, + "num_input_tokens_seen": 280958168, + "step": 2224 + }, + { + "epoch": 0.5705124094144809, + "loss": 1.0103638172149658, + "loss_ce": 0.0020629605278372765, + "loss_iou": 0.470703125, + "loss_num": 0.013671875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 280958168, + "step": 2224 + }, + { + "epoch": 0.5707689347784262, + "grad_norm": 60.0689811706543, + "learning_rate": 5e-06, + "loss": 1.0836, + "num_input_tokens_seen": 281084768, + "step": 2225 + }, + { + "epoch": 0.5707689347784262, + "loss": 1.301161766052246, + "loss_ce": 0.002333517652004957, + "loss_iou": 0.5859375, + "loss_num": 0.0252685546875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 281084768, + "step": 2225 + }, + { + "epoch": 0.5710254601423715, + "grad_norm": 52.500450134277344, + "learning_rate": 5e-06, + "loss": 1.0313, + "num_input_tokens_seen": 281212384, + "step": 2226 + }, + { + "epoch": 0.5710254601423715, + "loss": 0.9314784407615662, + "loss_ce": 0.0013026782544329762, + "loss_iou": 0.427734375, + "loss_num": 0.01470947265625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 281212384, + "step": 2226 + }, + { + "epoch": 0.5712819855063169, + "grad_norm": 48.1938362121582, + "learning_rate": 5e-06, + "loss": 0.9688, + "num_input_tokens_seen": 281338324, + "step": 2227 + }, + { + "epoch": 0.5712819855063169, + "loss": 0.9709362983703613, + "loss_ce": 0.0021862906869500875, + "loss_iou": 0.447265625, + "loss_num": 0.0146484375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 281338324, + "step": 2227 + }, + { + "epoch": 0.5715385108702623, + "grad_norm": 53.36368179321289, + "learning_rate": 5e-06, + "loss": 1.0946, + "num_input_tokens_seen": 281465824, + "step": 2228 + }, + { + "epoch": 0.5715385108702623, + "loss": 1.4214125871658325, + "loss_ce": 0.0019789792131632566, + "loss_iou": 0.62890625, + "loss_num": 0.0322265625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 281465824, + "step": 2228 + }, + { + "epoch": 0.5717950362342077, + "grad_norm": 39.174678802490234, + "learning_rate": 5e-06, + "loss": 1.1851, + "num_input_tokens_seen": 281591284, + "step": 2229 + }, + { + "epoch": 0.5717950362342077, + "loss": 1.4434847831726074, + "loss_ce": 0.0020786018576472998, + "loss_iou": 0.6171875, + "loss_num": 0.041259765625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 281591284, + "step": 2229 + }, + { + "epoch": 0.5720515615981531, + "grad_norm": 52.28636169433594, + "learning_rate": 5e-06, + "loss": 0.9452, + "num_input_tokens_seen": 281717636, + "step": 2230 + }, + { + "epoch": 0.5720515615981531, + "loss": 0.8811432719230652, + "loss_ce": 0.0027253320440649986, + "loss_iou": 0.41796875, + "loss_num": 0.00885009765625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 281717636, + "step": 2230 + }, + { + "epoch": 0.5723080869620983, + "grad_norm": 198.64175415039062, + "learning_rate": 5e-06, + "loss": 1.0508, + "num_input_tokens_seen": 281843424, + "step": 2231 + }, + { + "epoch": 0.5723080869620983, + "loss": 0.9850019216537476, + "loss_ce": 0.0011151679791510105, + "loss_iou": 0.44921875, + "loss_num": 0.0167236328125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 281843424, + "step": 2231 + }, + { + "epoch": 0.5725646123260437, + "grad_norm": 30.93668556213379, + "learning_rate": 5e-06, + "loss": 1.0448, + "num_input_tokens_seen": 281969772, + "step": 2232 + }, + { + "epoch": 0.5725646123260437, + "loss": 0.9947054386138916, + "loss_ce": 0.0005648602964356542, + "loss_iou": 0.4609375, + "loss_num": 0.0147705078125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 281969772, + "step": 2232 + }, + { + "epoch": 0.5728211376899891, + "grad_norm": 60.23025894165039, + "learning_rate": 5e-06, + "loss": 0.9904, + "num_input_tokens_seen": 282096152, + "step": 2233 + }, + { + "epoch": 0.5728211376899891, + "loss": 1.0110363960266113, + "loss_ce": 0.0051771411672234535, + "loss_iou": 0.46875, + "loss_num": 0.0137939453125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 282096152, + "step": 2233 + }, + { + "epoch": 0.5730776630539345, + "grad_norm": 49.198455810546875, + "learning_rate": 5e-06, + "loss": 0.996, + "num_input_tokens_seen": 282222008, + "step": 2234 + }, + { + "epoch": 0.5730776630539345, + "loss": 0.9654691219329834, + "loss_ce": 0.0011136349057778716, + "loss_iou": 0.443359375, + "loss_num": 0.015625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 282222008, + "step": 2234 + }, + { + "epoch": 0.5733341884178799, + "grad_norm": 51.311988830566406, + "learning_rate": 5e-06, + "loss": 0.9916, + "num_input_tokens_seen": 282348532, + "step": 2235 + }, + { + "epoch": 0.5733341884178799, + "loss": 0.986962080001831, + "loss_ce": 0.00014572578947991133, + "loss_iou": 0.4609375, + "loss_num": 0.01251220703125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 282348532, + "step": 2235 + }, + { + "epoch": 0.5735907137818251, + "grad_norm": 54.95654296875, + "learning_rate": 5e-06, + "loss": 1.0526, + "num_input_tokens_seen": 282474468, + "step": 2236 + }, + { + "epoch": 0.5735907137818251, + "loss": 0.9696420431137085, + "loss_ce": 0.0008920601685531437, + "loss_iou": 0.4453125, + "loss_num": 0.0159912109375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 282474468, + "step": 2236 + }, + { + "epoch": 0.5738472391457705, + "grad_norm": 43.67275619506836, + "learning_rate": 5e-06, + "loss": 1.2859, + "num_input_tokens_seen": 282600620, + "step": 2237 + }, + { + "epoch": 0.5738472391457705, + "loss": 1.2557644844055176, + "loss_ce": 0.0008817450725473464, + "loss_iou": 0.5625, + "loss_num": 0.025146484375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 282600620, + "step": 2237 + }, + { + "epoch": 0.5741037645097159, + "grad_norm": 73.80721282958984, + "learning_rate": 5e-06, + "loss": 0.96, + "num_input_tokens_seen": 282726808, + "step": 2238 + }, + { + "epoch": 0.5741037645097159, + "loss": 1.1512300968170166, + "loss_ce": 0.006210581865161657, + "loss_iou": 0.51953125, + "loss_num": 0.0205078125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 282726808, + "step": 2238 + }, + { + "epoch": 0.5743602898736613, + "grad_norm": 46.245601654052734, + "learning_rate": 5e-06, + "loss": 1.1324, + "num_input_tokens_seen": 282853128, + "step": 2239 + }, + { + "epoch": 0.5743602898736613, + "loss": 1.0906245708465576, + "loss_ce": 0.00029261180316098034, + "loss_iou": 0.51171875, + "loss_num": 0.0137939453125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 282853128, + "step": 2239 + }, + { + "epoch": 0.5746168152376067, + "grad_norm": 41.02141189575195, + "learning_rate": 5e-06, + "loss": 0.9696, + "num_input_tokens_seen": 282978848, + "step": 2240 + }, + { + "epoch": 0.5746168152376067, + "loss": 1.0845317840576172, + "loss_ce": 0.0025004895869642496, + "loss_iou": 0.498046875, + "loss_num": 0.0169677734375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 282978848, + "step": 2240 + }, + { + "epoch": 0.5748733406015519, + "grad_norm": 54.90932846069336, + "learning_rate": 5e-06, + "loss": 0.988, + "num_input_tokens_seen": 283105088, + "step": 2241 + }, + { + "epoch": 0.5748733406015519, + "loss": 0.9596421718597412, + "loss_ce": 0.0030992270912975073, + "loss_iou": 0.44921875, + "loss_num": 0.0115966796875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 283105088, + "step": 2241 + }, + { + "epoch": 0.5751298659654973, + "grad_norm": 41.68686294555664, + "learning_rate": 5e-06, + "loss": 0.9843, + "num_input_tokens_seen": 283231168, + "step": 2242 + }, + { + "epoch": 0.5751298659654973, + "loss": 1.0487587451934814, + "loss_ce": 0.0009071234962902963, + "loss_iou": 0.48046875, + "loss_num": 0.0177001953125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 283231168, + "step": 2242 + }, + { + "epoch": 0.5753863913294427, + "grad_norm": 53.76775360107422, + "learning_rate": 5e-06, + "loss": 1.0461, + "num_input_tokens_seen": 283356180, + "step": 2243 + }, + { + "epoch": 0.5753863913294427, + "loss": 0.8263636827468872, + "loss_ce": 0.0006801047711633146, + "loss_iou": 0.373046875, + "loss_num": 0.0155029296875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 283356180, + "step": 2243 + }, + { + "epoch": 0.5756429166933881, + "grad_norm": 54.90708541870117, + "learning_rate": 5e-06, + "loss": 1.1221, + "num_input_tokens_seen": 283482268, + "step": 2244 + }, + { + "epoch": 0.5756429166933881, + "loss": 1.1243772506713867, + "loss_ce": 0.0027952042873948812, + "loss_iou": 0.51171875, + "loss_num": 0.0191650390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 283482268, + "step": 2244 + }, + { + "epoch": 0.5758994420573335, + "grad_norm": 83.53764343261719, + "learning_rate": 5e-06, + "loss": 1.0215, + "num_input_tokens_seen": 283608764, + "step": 2245 + }, + { + "epoch": 0.5758994420573335, + "loss": 1.0757503509521484, + "loss_ce": 0.0005550433415919542, + "loss_iou": 0.494140625, + "loss_num": 0.0169677734375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 283608764, + "step": 2245 + }, + { + "epoch": 0.5761559674212787, + "grad_norm": 54.44892120361328, + "learning_rate": 5e-06, + "loss": 1.0271, + "num_input_tokens_seen": 283734540, + "step": 2246 + }, + { + "epoch": 0.5761559674212787, + "loss": 0.8860817551612854, + "loss_ce": 0.0003395703388378024, + "loss_iou": 0.41796875, + "loss_num": 0.0103759765625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 283734540, + "step": 2246 + }, + { + "epoch": 0.5764124927852241, + "grad_norm": 40.34468078613281, + "learning_rate": 5e-06, + "loss": 1.1341, + "num_input_tokens_seen": 283858972, + "step": 2247 + }, + { + "epoch": 0.5764124927852241, + "loss": 1.0909762382507324, + "loss_ce": 0.0006441898876801133, + "loss_iou": 0.490234375, + "loss_num": 0.022216796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 283858972, + "step": 2247 + }, + { + "epoch": 0.5766690181491695, + "grad_norm": 34.772674560546875, + "learning_rate": 5e-06, + "loss": 0.9643, + "num_input_tokens_seen": 283985192, + "step": 2248 + }, + { + "epoch": 0.5766690181491695, + "loss": 0.9901896715164185, + "loss_ce": 0.0009317906806245446, + "loss_iou": 0.4453125, + "loss_num": 0.0194091796875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 283985192, + "step": 2248 + }, + { + "epoch": 0.5769255435131149, + "grad_norm": 43.80963134765625, + "learning_rate": 5e-06, + "loss": 0.9344, + "num_input_tokens_seen": 284111712, + "step": 2249 + }, + { + "epoch": 0.5769255435131149, + "loss": 0.9860141277313232, + "loss_ce": 0.001639105612412095, + "loss_iou": 0.45703125, + "loss_num": 0.0135498046875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 284111712, + "step": 2249 + }, + { + "epoch": 0.5771820688770603, + "grad_norm": 51.15381622314453, + "learning_rate": 5e-06, + "loss": 0.9634, + "num_input_tokens_seen": 284238016, + "step": 2250 + }, + { + "epoch": 0.5771820688770603, + "eval_icons_CIoU": 0.19793132692575455, + "eval_icons_GIoU": 0.1591569408774376, + "eval_icons_IoU": 0.39132004976272583, + "eval_icons_MAE_all": 0.03227099031209946, + "eval_icons_MAE_h": 0.051868053153157234, + "eval_icons_MAE_w": 0.0582665391266346, + "eval_icons_MAE_x_boxes": 0.05914544127881527, + "eval_icons_MAE_y_boxes": 0.04679535888135433, + "eval_icons_NUM_probability": 0.9997171461582184, + "eval_icons_inside_bbox": 0.609375, + "eval_icons_loss": 1.7776092290878296, + "eval_icons_loss_ce": 0.0003541261175996624, + "eval_icons_loss_iou": 0.8216552734375, + "eval_icons_loss_num": 0.03556251525878906, + "eval_icons_loss_xval": 1.81982421875, + "eval_icons_runtime": 46.0679, + "eval_icons_samples_per_second": 1.085, + "eval_icons_steps_per_second": 0.043, + "num_input_tokens_seen": 284238016, + "step": 2250 + }, + { + "epoch": 0.5771820688770603, + "eval_screenspot_CIoU": 0.1255065736671289, + "eval_screenspot_GIoU": 0.11141415561238925, + "eval_screenspot_IoU": 0.2933393617471059, + "eval_screenspot_MAE_all": 0.07572593539953232, + "eval_screenspot_MAE_h": 0.06904004514217377, + "eval_screenspot_MAE_w": 0.1215948611497879, + "eval_screenspot_MAE_x_boxes": 0.0923725242416064, + "eval_screenspot_MAE_y_boxes": 0.06064350033799807, + "eval_screenspot_NUM_probability": 0.9998934666315714, + "eval_screenspot_inside_bbox": 0.6462500095367432, + "eval_screenspot_loss": 2.2103700637817383, + "eval_screenspot_loss_ce": 0.002832048300964137, + "eval_screenspot_loss_iou": 0.92236328125, + "eval_screenspot_loss_num": 0.0816497802734375, + "eval_screenspot_loss_xval": 2.2545572916666665, + "eval_screenspot_runtime": 81.5106, + "eval_screenspot_samples_per_second": 1.092, + "eval_screenspot_steps_per_second": 0.037, + "num_input_tokens_seen": 284238016, + "step": 2250 + }, + { + "epoch": 0.5771820688770603, + "loss": 2.179178476333618, + "loss_ce": 0.002420613542199135, + "loss_iou": 0.9140625, + "loss_num": 0.068359375, + "loss_xval": 2.171875, + "num_input_tokens_seen": 284238016, + "step": 2250 + }, + { + "epoch": 0.5774385942410056, + "grad_norm": 55.088130950927734, + "learning_rate": 5e-06, + "loss": 0.9725, + "num_input_tokens_seen": 284364828, + "step": 2251 + }, + { + "epoch": 0.5774385942410056, + "loss": 0.9229689836502075, + "loss_ce": 0.002558805514127016, + "loss_iou": 0.427734375, + "loss_num": 0.01324462890625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 284364828, + "step": 2251 + }, + { + "epoch": 0.5776951196049509, + "grad_norm": 50.19672393798828, + "learning_rate": 5e-06, + "loss": 1.0152, + "num_input_tokens_seen": 284490932, + "step": 2252 + }, + { + "epoch": 0.5776951196049509, + "loss": 1.229791283607483, + "loss_ce": 0.0017639752477407455, + "loss_iou": 0.5546875, + "loss_num": 0.0244140625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 284490932, + "step": 2252 + }, + { + "epoch": 0.5779516449688963, + "grad_norm": 41.68750762939453, + "learning_rate": 5e-06, + "loss": 1.0447, + "num_input_tokens_seen": 284617652, + "step": 2253 + }, + { + "epoch": 0.5779516449688963, + "loss": 1.0270447731018066, + "loss_ce": 0.0006775574875064194, + "loss_iou": 0.46875, + "loss_num": 0.0179443359375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 284617652, + "step": 2253 + }, + { + "epoch": 0.5782081703328417, + "grad_norm": 59.3745002746582, + "learning_rate": 5e-06, + "loss": 0.9212, + "num_input_tokens_seen": 284744744, + "step": 2254 + }, + { + "epoch": 0.5782081703328417, + "loss": 0.7657381296157837, + "loss_ce": 0.0020662054885178804, + "loss_iou": 0.365234375, + "loss_num": 0.006744384765625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 284744744, + "step": 2254 + }, + { + "epoch": 0.578464695696787, + "grad_norm": 67.38247680664062, + "learning_rate": 5e-06, + "loss": 0.9813, + "num_input_tokens_seen": 284869900, + "step": 2255 + }, + { + "epoch": 0.578464695696787, + "loss": 0.9027704000473022, + "loss_ce": 0.0004266508622094989, + "loss_iou": 0.42578125, + "loss_num": 0.0098876953125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 284869900, + "step": 2255 + }, + { + "epoch": 0.5787212210607324, + "grad_norm": 55.868324279785156, + "learning_rate": 5e-06, + "loss": 0.9592, + "num_input_tokens_seen": 284995920, + "step": 2256 + }, + { + "epoch": 0.5787212210607324, + "loss": 0.960580587387085, + "loss_ce": 0.0006196207832545042, + "loss_iou": 0.451171875, + "loss_num": 0.011962890625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 284995920, + "step": 2256 + }, + { + "epoch": 0.5789777464246777, + "grad_norm": 60.839866638183594, + "learning_rate": 5e-06, + "loss": 0.9939, + "num_input_tokens_seen": 285122188, + "step": 2257 + }, + { + "epoch": 0.5789777464246777, + "loss": 1.0641573667526245, + "loss_ce": 0.0016573506873100996, + "loss_iou": 0.5, + "loss_num": 0.0128173828125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 285122188, + "step": 2257 + }, + { + "epoch": 0.5792342717886231, + "grad_norm": 46.87041091918945, + "learning_rate": 5e-06, + "loss": 1.0252, + "num_input_tokens_seen": 285248244, + "step": 2258 + }, + { + "epoch": 0.5792342717886231, + "loss": 1.2368899583816528, + "loss_ce": 0.0015384089201688766, + "loss_iou": 0.5703125, + "loss_num": 0.019287109375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 285248244, + "step": 2258 + }, + { + "epoch": 0.5794907971525685, + "grad_norm": 36.695682525634766, + "learning_rate": 5e-06, + "loss": 0.986, + "num_input_tokens_seen": 285373412, + "step": 2259 + }, + { + "epoch": 0.5794907971525685, + "loss": 1.1304900646209717, + "loss_ce": 0.0015837789978832006, + "loss_iou": 0.50390625, + "loss_num": 0.0244140625, + "loss_xval": 1.125, + "num_input_tokens_seen": 285373412, + "step": 2259 + }, + { + "epoch": 0.5797473225165138, + "grad_norm": 31.57451057434082, + "learning_rate": 5e-06, + "loss": 1.0088, + "num_input_tokens_seen": 285499644, + "step": 2260 + }, + { + "epoch": 0.5797473225165138, + "loss": 0.8618742823600769, + "loss_ce": 0.0005461572436615825, + "loss_iou": 0.40625, + "loss_num": 0.0093994140625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 285499644, + "step": 2260 + }, + { + "epoch": 0.5800038478804592, + "grad_norm": 52.21601104736328, + "learning_rate": 5e-06, + "loss": 0.8864, + "num_input_tokens_seen": 285624308, + "step": 2261 + }, + { + "epoch": 0.5800038478804592, + "loss": 0.7393556833267212, + "loss_ce": 0.0015626954846084118, + "loss_iou": 0.359375, + "loss_num": 0.004425048828125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 285624308, + "step": 2261 + }, + { + "epoch": 0.5802603732444045, + "grad_norm": 53.84465408325195, + "learning_rate": 5e-06, + "loss": 1.0018, + "num_input_tokens_seen": 285751528, + "step": 2262 + }, + { + "epoch": 0.5802603732444045, + "loss": 1.074723720550537, + "loss_ce": 0.0014815161703154445, + "loss_iou": 0.498046875, + "loss_num": 0.01556396484375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 285751528, + "step": 2262 + }, + { + "epoch": 0.5805168986083499, + "grad_norm": 57.43070983886719, + "learning_rate": 5e-06, + "loss": 0.9119, + "num_input_tokens_seen": 285877412, + "step": 2263 + }, + { + "epoch": 0.5805168986083499, + "loss": 0.8867683410644531, + "loss_ce": 0.00371170649304986, + "loss_iou": 0.41015625, + "loss_num": 0.01214599609375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 285877412, + "step": 2263 + }, + { + "epoch": 0.5807734239722953, + "grad_norm": 51.67479705810547, + "learning_rate": 5e-06, + "loss": 1.0146, + "num_input_tokens_seen": 286003856, + "step": 2264 + }, + { + "epoch": 0.5807734239722953, + "loss": 1.0852046012878418, + "loss_ce": 0.001220195204950869, + "loss_iou": 0.5, + "loss_num": 0.0169677734375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 286003856, + "step": 2264 + }, + { + "epoch": 0.5810299493362406, + "grad_norm": 75.89453887939453, + "learning_rate": 5e-06, + "loss": 1.1129, + "num_input_tokens_seen": 286130396, + "step": 2265 + }, + { + "epoch": 0.5810299493362406, + "loss": 0.9778915643692017, + "loss_ce": 0.00035248787025921047, + "loss_iou": 0.453125, + "loss_num": 0.01409912109375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 286130396, + "step": 2265 + }, + { + "epoch": 0.581286474700186, + "grad_norm": 55.59190368652344, + "learning_rate": 5e-06, + "loss": 1.0374, + "num_input_tokens_seen": 286256392, + "step": 2266 + }, + { + "epoch": 0.581286474700186, + "loss": 1.0047212839126587, + "loss_ce": 0.00032677644048817456, + "loss_iou": 0.47265625, + "loss_num": 0.01171875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 286256392, + "step": 2266 + }, + { + "epoch": 0.5815430000641313, + "grad_norm": 41.733402252197266, + "learning_rate": 5e-06, + "loss": 1.0777, + "num_input_tokens_seen": 286381844, + "step": 2267 + }, + { + "epoch": 0.5815430000641313, + "loss": 1.107222080230713, + "loss_ce": 0.00028843176551163197, + "loss_iou": 0.51171875, + "loss_num": 0.01611328125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 286381844, + "step": 2267 + }, + { + "epoch": 0.5817995254280767, + "grad_norm": 42.87211608886719, + "learning_rate": 5e-06, + "loss": 1.0669, + "num_input_tokens_seen": 286508236, + "step": 2268 + }, + { + "epoch": 0.5817995254280767, + "loss": 1.1914091110229492, + "loss_ce": 0.0019559753127396107, + "loss_iou": 0.5390625, + "loss_num": 0.0216064453125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 286508236, + "step": 2268 + }, + { + "epoch": 0.582056050792022, + "grad_norm": 57.303680419921875, + "learning_rate": 5e-06, + "loss": 0.9959, + "num_input_tokens_seen": 286634016, + "step": 2269 + }, + { + "epoch": 0.582056050792022, + "loss": 1.0285944938659668, + "loss_ce": 0.0013728067278862, + "loss_iou": 0.4609375, + "loss_num": 0.021484375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 286634016, + "step": 2269 + }, + { + "epoch": 0.5823125761559674, + "grad_norm": 49.61715316772461, + "learning_rate": 5e-06, + "loss": 1.1026, + "num_input_tokens_seen": 286761024, + "step": 2270 + }, + { + "epoch": 0.5823125761559674, + "loss": 1.2380586862564087, + "loss_ce": 0.0012422139989212155, + "loss_iou": 0.5546875, + "loss_num": 0.0244140625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 286761024, + "step": 2270 + }, + { + "epoch": 0.5825691015199128, + "grad_norm": 34.102230072021484, + "learning_rate": 5e-06, + "loss": 0.9323, + "num_input_tokens_seen": 286886632, + "step": 2271 + }, + { + "epoch": 0.5825691015199128, + "loss": 0.9303451776504517, + "loss_ce": 0.001634210697375238, + "loss_iou": 0.42578125, + "loss_num": 0.015380859375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 286886632, + "step": 2271 + }, + { + "epoch": 0.5828256268838582, + "grad_norm": 48.215763092041016, + "learning_rate": 5e-06, + "loss": 1.0042, + "num_input_tokens_seen": 287013924, + "step": 2272 + }, + { + "epoch": 0.5828256268838582, + "loss": 0.9306126832962036, + "loss_ce": 0.000925180152989924, + "loss_iou": 0.447265625, + "loss_num": 0.007476806640625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 287013924, + "step": 2272 + }, + { + "epoch": 0.5830821522478035, + "grad_norm": 72.29576110839844, + "learning_rate": 5e-06, + "loss": 0.9105, + "num_input_tokens_seen": 287140808, + "step": 2273 + }, + { + "epoch": 0.5830821522478035, + "loss": 0.912997305393219, + "loss_ce": 0.0013761724112555385, + "loss_iou": 0.4296875, + "loss_num": 0.0107421875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 287140808, + "step": 2273 + }, + { + "epoch": 0.5833386776117488, + "grad_norm": 46.6407356262207, + "learning_rate": 5e-06, + "loss": 1.0048, + "num_input_tokens_seen": 287265924, + "step": 2274 + }, + { + "epoch": 0.5833386776117488, + "loss": 1.119551181793213, + "loss_ce": 0.0033401604741811752, + "loss_iou": 0.51171875, + "loss_num": 0.0186767578125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 287265924, + "step": 2274 + }, + { + "epoch": 0.5835952029756942, + "grad_norm": 27.71636962890625, + "learning_rate": 5e-06, + "loss": 1.0326, + "num_input_tokens_seen": 287391876, + "step": 2275 + }, + { + "epoch": 0.5835952029756942, + "loss": 1.0950871706008911, + "loss_ce": 0.004266802687197924, + "loss_iou": 0.48046875, + "loss_num": 0.0260009765625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 287391876, + "step": 2275 + }, + { + "epoch": 0.5838517283396396, + "grad_norm": 41.43502426147461, + "learning_rate": 5e-06, + "loss": 1.091, + "num_input_tokens_seen": 287518432, + "step": 2276 + }, + { + "epoch": 0.5838517283396396, + "loss": 1.1042592525482178, + "loss_ce": 0.0007435904699377716, + "loss_iou": 0.50390625, + "loss_num": 0.019287109375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 287518432, + "step": 2276 + }, + { + "epoch": 0.584108253703585, + "grad_norm": 58.871559143066406, + "learning_rate": 5e-06, + "loss": 1.1052, + "num_input_tokens_seen": 287645216, + "step": 2277 + }, + { + "epoch": 0.584108253703585, + "loss": 1.3482320308685303, + "loss_ce": 0.004482047166675329, + "loss_iou": 0.5625, + "loss_num": 0.04443359375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 287645216, + "step": 2277 + }, + { + "epoch": 0.5843647790675303, + "grad_norm": 47.11354064941406, + "learning_rate": 5e-06, + "loss": 0.9975, + "num_input_tokens_seen": 287771684, + "step": 2278 + }, + { + "epoch": 0.5843647790675303, + "loss": 1.0229675769805908, + "loss_ce": 0.001971406629309058, + "loss_iou": 0.478515625, + "loss_num": 0.0128173828125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 287771684, + "step": 2278 + }, + { + "epoch": 0.5846213044314756, + "grad_norm": 24.086027145385742, + "learning_rate": 5e-06, + "loss": 0.9724, + "num_input_tokens_seen": 287898116, + "step": 2279 + }, + { + "epoch": 0.5846213044314756, + "loss": 0.945029079914093, + "loss_ce": 0.0006931039388291538, + "loss_iou": 0.4375, + "loss_num": 0.01385498046875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 287898116, + "step": 2279 + }, + { + "epoch": 0.584877829795421, + "grad_norm": 33.48784637451172, + "learning_rate": 5e-06, + "loss": 0.9629, + "num_input_tokens_seen": 288023988, + "step": 2280 + }, + { + "epoch": 0.584877829795421, + "loss": 0.8201531171798706, + "loss_ce": 0.002282051369547844, + "loss_iou": 0.39453125, + "loss_num": 0.006134033203125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 288023988, + "step": 2280 + }, + { + "epoch": 0.5851343551593664, + "grad_norm": 66.6264877319336, + "learning_rate": 5e-06, + "loss": 1.0201, + "num_input_tokens_seen": 288149784, + "step": 2281 + }, + { + "epoch": 0.5851343551593664, + "loss": 1.0106350183486938, + "loss_ce": 0.00038116273935884237, + "loss_iou": 0.466796875, + "loss_num": 0.01531982421875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 288149784, + "step": 2281 + }, + { + "epoch": 0.5853908805233118, + "grad_norm": 54.58407974243164, + "learning_rate": 5e-06, + "loss": 1.0668, + "num_input_tokens_seen": 288276044, + "step": 2282 + }, + { + "epoch": 0.5853908805233118, + "loss": 1.1822755336761475, + "loss_ce": 0.0025880462490022182, + "loss_iou": 0.53125, + "loss_num": 0.023193359375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 288276044, + "step": 2282 + }, + { + "epoch": 0.585647405887257, + "grad_norm": 47.02547073364258, + "learning_rate": 5e-06, + "loss": 1.0616, + "num_input_tokens_seen": 288402268, + "step": 2283 + }, + { + "epoch": 0.585647405887257, + "loss": 1.1305227279663086, + "loss_ce": 0.00015163978969212621, + "loss_iou": 0.51953125, + "loss_num": 0.017822265625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 288402268, + "step": 2283 + }, + { + "epoch": 0.5859039312512024, + "grad_norm": 60.92919921875, + "learning_rate": 5e-06, + "loss": 1.0682, + "num_input_tokens_seen": 288528940, + "step": 2284 + }, + { + "epoch": 0.5859039312512024, + "loss": 1.1100897789001465, + "loss_ce": 0.003156077116727829, + "loss_iou": 0.51171875, + "loss_num": 0.0164794921875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 288528940, + "step": 2284 + }, + { + "epoch": 0.5861604566151478, + "grad_norm": 59.14535903930664, + "learning_rate": 5e-06, + "loss": 0.979, + "num_input_tokens_seen": 288654936, + "step": 2285 + }, + { + "epoch": 0.5861604566151478, + "loss": 1.1865986585617065, + "loss_ce": 0.002028298331424594, + "loss_iou": 0.5390625, + "loss_num": 0.0211181640625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 288654936, + "step": 2285 + }, + { + "epoch": 0.5864169819790932, + "grad_norm": 45.92155456542969, + "learning_rate": 5e-06, + "loss": 0.9416, + "num_input_tokens_seen": 288779860, + "step": 2286 + }, + { + "epoch": 0.5864169819790932, + "loss": 0.9807435274124146, + "loss_ce": 0.0017396315233781934, + "loss_iou": 0.45703125, + "loss_num": 0.0126953125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 288779860, + "step": 2286 + }, + { + "epoch": 0.5866735073430386, + "grad_norm": 37.88084411621094, + "learning_rate": 5e-06, + "loss": 0.9643, + "num_input_tokens_seen": 288905076, + "step": 2287 + }, + { + "epoch": 0.5866735073430386, + "loss": 1.0260951519012451, + "loss_ce": 0.0021693662274628878, + "loss_iou": 0.45703125, + "loss_num": 0.021728515625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 288905076, + "step": 2287 + }, + { + "epoch": 0.5869300327069839, + "grad_norm": 50.115901947021484, + "learning_rate": 5e-06, + "loss": 0.946, + "num_input_tokens_seen": 289031844, + "step": 2288 + }, + { + "epoch": 0.5869300327069839, + "loss": 0.8172237873077393, + "loss_ce": 0.0003292668843641877, + "loss_iou": 0.3828125, + "loss_num": 0.0098876953125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 289031844, + "step": 2288 + }, + { + "epoch": 0.5871865580709292, + "grad_norm": 60.2529411315918, + "learning_rate": 5e-06, + "loss": 1.1307, + "num_input_tokens_seen": 289157492, + "step": 2289 + }, + { + "epoch": 0.5871865580709292, + "loss": 1.2013952732086182, + "loss_ce": 0.0011999878333881497, + "loss_iou": 0.5390625, + "loss_num": 0.025146484375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 289157492, + "step": 2289 + }, + { + "epoch": 0.5874430834348746, + "grad_norm": 48.58702087402344, + "learning_rate": 5e-06, + "loss": 1.0665, + "num_input_tokens_seen": 289284084, + "step": 2290 + }, + { + "epoch": 0.5874430834348746, + "loss": 1.1696836948394775, + "loss_ce": 0.002691560424864292, + "loss_iou": 0.5390625, + "loss_num": 0.017578125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 289284084, + "step": 2290 + }, + { + "epoch": 0.58769960879882, + "grad_norm": 50.5435676574707, + "learning_rate": 5e-06, + "loss": 1.1193, + "num_input_tokens_seen": 289410432, + "step": 2291 + }, + { + "epoch": 0.58769960879882, + "loss": 1.1102272272109985, + "loss_ce": 0.0013404700439423323, + "loss_iou": 0.49609375, + "loss_num": 0.0235595703125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 289410432, + "step": 2291 + }, + { + "epoch": 0.5879561341627654, + "grad_norm": 45.19846725463867, + "learning_rate": 5e-06, + "loss": 0.953, + "num_input_tokens_seen": 289536236, + "step": 2292 + }, + { + "epoch": 0.5879561341627654, + "loss": 1.0869293212890625, + "loss_ce": 0.0009917940478771925, + "loss_iou": 0.494140625, + "loss_num": 0.01953125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 289536236, + "step": 2292 + }, + { + "epoch": 0.5882126595267108, + "grad_norm": 53.91408157348633, + "learning_rate": 5e-06, + "loss": 0.9257, + "num_input_tokens_seen": 289663172, + "step": 2293 + }, + { + "epoch": 0.5882126595267108, + "loss": 1.073516845703125, + "loss_ce": 0.0007629689062014222, + "loss_iou": 0.478515625, + "loss_num": 0.02294921875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 289663172, + "step": 2293 + }, + { + "epoch": 0.588469184890656, + "grad_norm": 45.148101806640625, + "learning_rate": 5e-06, + "loss": 1.0471, + "num_input_tokens_seen": 289789744, + "step": 2294 + }, + { + "epoch": 0.588469184890656, + "loss": 0.9837645292282104, + "loss_ce": 0.00036609184462577105, + "loss_iou": 0.45703125, + "loss_num": 0.0133056640625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 289789744, + "step": 2294 + }, + { + "epoch": 0.5887257102546014, + "grad_norm": 46.5736083984375, + "learning_rate": 5e-06, + "loss": 1.0602, + "num_input_tokens_seen": 289915700, + "step": 2295 + }, + { + "epoch": 0.5887257102546014, + "loss": 1.093023657798767, + "loss_ce": 0.0002502023708075285, + "loss_iou": 0.484375, + "loss_num": 0.0244140625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 289915700, + "step": 2295 + }, + { + "epoch": 0.5889822356185468, + "grad_norm": 45.162940979003906, + "learning_rate": 5e-06, + "loss": 1.0896, + "num_input_tokens_seen": 290042188, + "step": 2296 + }, + { + "epoch": 0.5889822356185468, + "loss": 1.2067360877990723, + "loss_ce": 0.001657957211136818, + "loss_iou": 0.546875, + "loss_num": 0.022705078125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 290042188, + "step": 2296 + }, + { + "epoch": 0.5892387609824922, + "grad_norm": 41.95317077636719, + "learning_rate": 5e-06, + "loss": 0.9579, + "num_input_tokens_seen": 290168024, + "step": 2297 + }, + { + "epoch": 0.5892387609824922, + "loss": 0.8861187100410461, + "loss_ce": 0.0008647897047922015, + "loss_iou": 0.416015625, + "loss_num": 0.01080322265625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 290168024, + "step": 2297 + }, + { + "epoch": 0.5894952863464376, + "grad_norm": 55.04458999633789, + "learning_rate": 5e-06, + "loss": 0.9231, + "num_input_tokens_seen": 290294076, + "step": 2298 + }, + { + "epoch": 0.5894952863464376, + "loss": 1.0454572439193726, + "loss_ce": 0.001511923735961318, + "loss_iou": 0.46484375, + "loss_num": 0.0234375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 290294076, + "step": 2298 + }, + { + "epoch": 0.5897518117103828, + "grad_norm": 58.13313293457031, + "learning_rate": 5e-06, + "loss": 0.9906, + "num_input_tokens_seen": 290419144, + "step": 2299 + }, + { + "epoch": 0.5897518117103828, + "loss": 0.919009804725647, + "loss_ce": 0.0010410206159576774, + "loss_iou": 0.4296875, + "loss_num": 0.0118408203125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 290419144, + "step": 2299 + }, + { + "epoch": 0.5900083370743282, + "grad_norm": 48.28561019897461, + "learning_rate": 5e-06, + "loss": 1.1189, + "num_input_tokens_seen": 290546412, + "step": 2300 + }, + { + "epoch": 0.5900083370743282, + "loss": 1.11970853805542, + "loss_ce": 0.0015445370227098465, + "loss_iou": 0.5, + "loss_num": 0.02392578125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 290546412, + "step": 2300 + }, + { + "epoch": 0.5902648624382736, + "grad_norm": 54.705238342285156, + "learning_rate": 5e-06, + "loss": 0.9976, + "num_input_tokens_seen": 290672412, + "step": 2301 + }, + { + "epoch": 0.5902648624382736, + "loss": 1.0895664691925049, + "loss_ce": 0.0006992334383539855, + "loss_iou": 0.5, + "loss_num": 0.0177001953125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 290672412, + "step": 2301 + }, + { + "epoch": 0.590521387802219, + "grad_norm": 51.05278778076172, + "learning_rate": 5e-06, + "loss": 1.0737, + "num_input_tokens_seen": 290797796, + "step": 2302 + }, + { + "epoch": 0.590521387802219, + "loss": 1.2142598628997803, + "loss_ce": 0.001369209261611104, + "loss_iou": 0.5546875, + "loss_num": 0.0213623046875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 290797796, + "step": 2302 + }, + { + "epoch": 0.5907779131661643, + "grad_norm": 34.975250244140625, + "learning_rate": 5e-06, + "loss": 0.9657, + "num_input_tokens_seen": 290923528, + "step": 2303 + }, + { + "epoch": 0.5907779131661643, + "loss": 1.116004228591919, + "loss_ce": 0.00028154952451586723, + "loss_iou": 0.515625, + "loss_num": 0.016845703125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 290923528, + "step": 2303 + }, + { + "epoch": 0.5910344385301096, + "grad_norm": 32.274192810058594, + "learning_rate": 5e-06, + "loss": 0.9616, + "num_input_tokens_seen": 291049368, + "step": 2304 + }, + { + "epoch": 0.5910344385301096, + "loss": 1.1439359188079834, + "loss_ce": 0.0003813515941146761, + "loss_iou": 0.51953125, + "loss_num": 0.0211181640625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 291049368, + "step": 2304 + }, + { + "epoch": 0.591290963894055, + "grad_norm": 60.639617919921875, + "learning_rate": 5e-06, + "loss": 1.0814, + "num_input_tokens_seen": 291176872, + "step": 2305 + }, + { + "epoch": 0.591290963894055, + "loss": 0.9734630584716797, + "loss_ce": 0.00373642286285758, + "loss_iou": 0.451171875, + "loss_num": 0.013427734375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 291176872, + "step": 2305 + }, + { + "epoch": 0.5915474892580004, + "grad_norm": 42.64265441894531, + "learning_rate": 5e-06, + "loss": 1.0741, + "num_input_tokens_seen": 291302524, + "step": 2306 + }, + { + "epoch": 0.5915474892580004, + "loss": 1.2454249858856201, + "loss_ce": 0.003725821152329445, + "loss_iou": 0.55859375, + "loss_num": 0.025390625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 291302524, + "step": 2306 + }, + { + "epoch": 0.5918040146219458, + "grad_norm": 24.7999324798584, + "learning_rate": 5e-06, + "loss": 0.9294, + "num_input_tokens_seen": 291428160, + "step": 2307 + }, + { + "epoch": 0.5918040146219458, + "loss": 1.1812859773635864, + "loss_ce": 0.0001336273708147928, + "loss_iou": 0.54296875, + "loss_num": 0.0191650390625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 291428160, + "step": 2307 + }, + { + "epoch": 0.5920605399858911, + "grad_norm": 24.495330810546875, + "learning_rate": 5e-06, + "loss": 0.8388, + "num_input_tokens_seen": 291553680, + "step": 2308 + }, + { + "epoch": 0.5920605399858911, + "loss": 0.8544079065322876, + "loss_ce": 0.0008922575507313013, + "loss_iou": 0.400390625, + "loss_num": 0.01025390625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 291553680, + "step": 2308 + }, + { + "epoch": 0.5923170653498364, + "grad_norm": 47.533042907714844, + "learning_rate": 5e-06, + "loss": 0.9455, + "num_input_tokens_seen": 291679700, + "step": 2309 + }, + { + "epoch": 0.5923170653498364, + "loss": 0.8239967823028564, + "loss_ce": 0.00026626078761182725, + "loss_iou": 0.390625, + "loss_num": 0.00836181640625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 291679700, + "step": 2309 + }, + { + "epoch": 0.5925735907137818, + "grad_norm": 58.973445892333984, + "learning_rate": 5e-06, + "loss": 0.9845, + "num_input_tokens_seen": 291806780, + "step": 2310 + }, + { + "epoch": 0.5925735907137818, + "loss": 1.2595105171203613, + "loss_ce": 0.0012097798753529787, + "loss_iou": 0.578125, + "loss_num": 0.0201416015625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 291806780, + "step": 2310 + }, + { + "epoch": 0.5928301160777272, + "grad_norm": 89.13544464111328, + "learning_rate": 5e-06, + "loss": 0.885, + "num_input_tokens_seen": 291933300, + "step": 2311 + }, + { + "epoch": 0.5928301160777272, + "loss": 1.0378074645996094, + "loss_ce": 0.0011863324325531721, + "loss_iou": 0.474609375, + "loss_num": 0.017578125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 291933300, + "step": 2311 + }, + { + "epoch": 0.5930866414416726, + "grad_norm": 67.94842529296875, + "learning_rate": 5e-06, + "loss": 1.0881, + "num_input_tokens_seen": 292060508, + "step": 2312 + }, + { + "epoch": 0.5930866414416726, + "loss": 0.9627427458763123, + "loss_ce": 0.00034045439679175615, + "loss_iou": 0.443359375, + "loss_num": 0.015380859375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 292060508, + "step": 2312 + }, + { + "epoch": 0.5933431668056179, + "grad_norm": 28.837142944335938, + "learning_rate": 5e-06, + "loss": 0.9479, + "num_input_tokens_seen": 292186748, + "step": 2313 + }, + { + "epoch": 0.5933431668056179, + "loss": 0.7363103628158569, + "loss_ce": 0.002667805412784219, + "loss_iou": 0.34765625, + "loss_num": 0.00787353515625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 292186748, + "step": 2313 + }, + { + "epoch": 0.5935996921695633, + "grad_norm": 39.987060546875, + "learning_rate": 5e-06, + "loss": 0.9748, + "num_input_tokens_seen": 292312808, + "step": 2314 + }, + { + "epoch": 0.5935996921695633, + "loss": 0.9550735950469971, + "loss_ce": 0.00048377137864008546, + "loss_iou": 0.4375, + "loss_num": 0.015625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 292312808, + "step": 2314 + }, + { + "epoch": 0.5938562175335086, + "grad_norm": 68.28369903564453, + "learning_rate": 5e-06, + "loss": 1.0291, + "num_input_tokens_seen": 292439064, + "step": 2315 + }, + { + "epoch": 0.5938562175335086, + "loss": 1.0445154905319214, + "loss_ce": 0.0005701752961613238, + "loss_iou": 0.486328125, + "loss_num": 0.01409912109375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 292439064, + "step": 2315 + }, + { + "epoch": 0.594112742897454, + "grad_norm": 50.857215881347656, + "learning_rate": 5e-06, + "loss": 1.0444, + "num_input_tokens_seen": 292565800, + "step": 2316 + }, + { + "epoch": 0.594112742897454, + "loss": 1.034066915512085, + "loss_ce": 0.006723094265908003, + "loss_iou": 0.48046875, + "loss_num": 0.012939453125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 292565800, + "step": 2316 + }, + { + "epoch": 0.5943692682613994, + "grad_norm": 35.74585723876953, + "learning_rate": 5e-06, + "loss": 0.9786, + "num_input_tokens_seen": 292691180, + "step": 2317 + }, + { + "epoch": 0.5943692682613994, + "loss": 1.0223612785339355, + "loss_ce": 0.00038872676668688655, + "loss_iou": 0.48046875, + "loss_num": 0.01177978515625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 292691180, + "step": 2317 + }, + { + "epoch": 0.5946257936253447, + "grad_norm": 38.40127944946289, + "learning_rate": 5e-06, + "loss": 1.0309, + "num_input_tokens_seen": 292815748, + "step": 2318 + }, + { + "epoch": 0.5946257936253447, + "loss": 1.0342509746551514, + "loss_ce": 0.0005596159026026726, + "loss_iou": 0.484375, + "loss_num": 0.0130615234375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 292815748, + "step": 2318 + }, + { + "epoch": 0.5948823189892901, + "grad_norm": 35.82906723022461, + "learning_rate": 5e-06, + "loss": 1.0031, + "num_input_tokens_seen": 292940916, + "step": 2319 + }, + { + "epoch": 0.5948823189892901, + "loss": 0.7945461869239807, + "loss_ce": 0.0003567672974895686, + "loss_iou": 0.369140625, + "loss_num": 0.0108642578125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 292940916, + "step": 2319 + }, + { + "epoch": 0.5951388443532354, + "grad_norm": 54.18876647949219, + "learning_rate": 5e-06, + "loss": 0.9282, + "num_input_tokens_seen": 293067588, + "step": 2320 + }, + { + "epoch": 0.5951388443532354, + "loss": 0.829771876335144, + "loss_ce": 0.0006703597609885037, + "loss_iou": 0.3984375, + "loss_num": 0.006256103515625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 293067588, + "step": 2320 + }, + { + "epoch": 0.5953953697171808, + "grad_norm": 46.512367248535156, + "learning_rate": 5e-06, + "loss": 1.0253, + "num_input_tokens_seen": 293191504, + "step": 2321 + }, + { + "epoch": 0.5953953697171808, + "loss": 0.9407771825790405, + "loss_ce": 0.0003475116682238877, + "loss_iou": 0.4375, + "loss_num": 0.013427734375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 293191504, + "step": 2321 + }, + { + "epoch": 0.5956518950811261, + "grad_norm": 29.747386932373047, + "learning_rate": 5e-06, + "loss": 0.9981, + "num_input_tokens_seen": 293317856, + "step": 2322 + }, + { + "epoch": 0.5956518950811261, + "loss": 0.8702504634857178, + "loss_ce": 0.00013331117224879563, + "loss_iou": 0.40625, + "loss_num": 0.01177978515625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 293317856, + "step": 2322 + }, + { + "epoch": 0.5959084204450715, + "grad_norm": 43.62669372558594, + "learning_rate": 5e-06, + "loss": 0.95, + "num_input_tokens_seen": 293444056, + "step": 2323 + }, + { + "epoch": 0.5959084204450715, + "loss": 0.9830790162086487, + "loss_ce": 0.0006571381818503141, + "loss_iou": 0.466796875, + "loss_num": 0.0101318359375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 293444056, + "step": 2323 + }, + { + "epoch": 0.5961649458090169, + "grad_norm": 44.719032287597656, + "learning_rate": 5e-06, + "loss": 1.0186, + "num_input_tokens_seen": 293569836, + "step": 2324 + }, + { + "epoch": 0.5961649458090169, + "loss": 1.124413251876831, + "loss_ce": 0.004784312564879656, + "loss_iou": 0.515625, + "loss_num": 0.0174560546875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 293569836, + "step": 2324 + }, + { + "epoch": 0.5964214711729622, + "grad_norm": 68.10160064697266, + "learning_rate": 5e-06, + "loss": 0.895, + "num_input_tokens_seen": 293695616, + "step": 2325 + }, + { + "epoch": 0.5964214711729622, + "loss": 0.9030669927597046, + "loss_ce": 0.0007232313510030508, + "loss_iou": 0.41796875, + "loss_num": 0.01361083984375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 293695616, + "step": 2325 + }, + { + "epoch": 0.5966779965369076, + "grad_norm": 62.12166976928711, + "learning_rate": 5e-06, + "loss": 1.0999, + "num_input_tokens_seen": 293822356, + "step": 2326 + }, + { + "epoch": 0.5966779965369076, + "loss": 0.8887626528739929, + "loss_ce": 0.0005790781578980386, + "loss_iou": 0.41796875, + "loss_num": 0.01080322265625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 293822356, + "step": 2326 + }, + { + "epoch": 0.5969345219008529, + "grad_norm": 44.03275680541992, + "learning_rate": 5e-06, + "loss": 0.9549, + "num_input_tokens_seen": 293947824, + "step": 2327 + }, + { + "epoch": 0.5969345219008529, + "loss": 1.002312183380127, + "loss_ce": 0.0003590636479202658, + "loss_iou": 0.46484375, + "loss_num": 0.01409912109375, + "loss_xval": 1.0, + "num_input_tokens_seen": 293947824, + "step": 2327 + }, + { + "epoch": 0.5971910472647983, + "grad_norm": 52.77204513549805, + "learning_rate": 5e-06, + "loss": 0.9899, + "num_input_tokens_seen": 294074516, + "step": 2328 + }, + { + "epoch": 0.5971910472647983, + "loss": 0.9081615209579468, + "loss_ce": 0.0014232808025553823, + "loss_iou": 0.423828125, + "loss_num": 0.0115966796875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 294074516, + "step": 2328 + }, + { + "epoch": 0.5974475726287437, + "grad_norm": 69.1312026977539, + "learning_rate": 5e-06, + "loss": 1.0554, + "num_input_tokens_seen": 294200936, + "step": 2329 + }, + { + "epoch": 0.5974475726287437, + "loss": 1.1638425588607788, + "loss_ce": 0.0061277663335204124, + "loss_iou": 0.50390625, + "loss_num": 0.029541015625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 294200936, + "step": 2329 + }, + { + "epoch": 0.597704097992689, + "grad_norm": 51.3317756652832, + "learning_rate": 5e-06, + "loss": 1.096, + "num_input_tokens_seen": 294327568, + "step": 2330 + }, + { + "epoch": 0.597704097992689, + "loss": 1.011521816253662, + "loss_ce": 0.0007795936544425786, + "loss_iou": 0.46484375, + "loss_num": 0.0164794921875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 294327568, + "step": 2330 + }, + { + "epoch": 0.5979606233566344, + "grad_norm": 27.79184341430664, + "learning_rate": 5e-06, + "loss": 0.9504, + "num_input_tokens_seen": 294453068, + "step": 2331 + }, + { + "epoch": 0.5979606233566344, + "loss": 0.9353563785552979, + "loss_ce": 0.0002977780532091856, + "loss_iou": 0.423828125, + "loss_num": 0.0174560546875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 294453068, + "step": 2331 + }, + { + "epoch": 0.5982171487205797, + "grad_norm": 42.96196365356445, + "learning_rate": 5e-06, + "loss": 0.9338, + "num_input_tokens_seen": 294578024, + "step": 2332 + }, + { + "epoch": 0.5982171487205797, + "loss": 0.9908304214477539, + "loss_ce": 0.002060859464108944, + "loss_iou": 0.44921875, + "loss_num": 0.01806640625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 294578024, + "step": 2332 + }, + { + "epoch": 0.5984736740845251, + "grad_norm": 32.76695251464844, + "learning_rate": 5e-06, + "loss": 1.0141, + "num_input_tokens_seen": 294703524, + "step": 2333 + }, + { + "epoch": 0.5984736740845251, + "loss": 0.966947078704834, + "loss_ce": 0.0043005263432860374, + "loss_iou": 0.453125, + "loss_num": 0.01123046875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 294703524, + "step": 2333 + }, + { + "epoch": 0.5987301994484705, + "grad_norm": 29.457813262939453, + "learning_rate": 5e-06, + "loss": 0.955, + "num_input_tokens_seen": 294829236, + "step": 2334 + }, + { + "epoch": 0.5987301994484705, + "loss": 0.8254052400588989, + "loss_ce": 0.001186518114991486, + "loss_iou": 0.390625, + "loss_num": 0.00848388671875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 294829236, + "step": 2334 + }, + { + "epoch": 0.5989867248124159, + "grad_norm": 32.25540542602539, + "learning_rate": 5e-06, + "loss": 0.9865, + "num_input_tokens_seen": 294955240, + "step": 2335 + }, + { + "epoch": 0.5989867248124159, + "loss": 1.013364553451538, + "loss_ce": 0.00164588273037225, + "loss_iou": 0.45703125, + "loss_num": 0.0194091796875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 294955240, + "step": 2335 + }, + { + "epoch": 0.5992432501763612, + "grad_norm": 31.225496292114258, + "learning_rate": 5e-06, + "loss": 0.8986, + "num_input_tokens_seen": 295081980, + "step": 2336 + }, + { + "epoch": 0.5992432501763612, + "loss": 0.8935205936431885, + "loss_ce": 0.0014307815581560135, + "loss_iou": 0.421875, + "loss_num": 0.00982666015625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 295081980, + "step": 2336 + }, + { + "epoch": 0.5994997755403065, + "grad_norm": 41.831512451171875, + "learning_rate": 5e-06, + "loss": 1.04, + "num_input_tokens_seen": 295210592, + "step": 2337 + }, + { + "epoch": 0.5994997755403065, + "loss": 1.0793217420578003, + "loss_ce": 0.0016850350657477975, + "loss_iou": 0.48828125, + "loss_num": 0.01953125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 295210592, + "step": 2337 + }, + { + "epoch": 0.5997563009042519, + "grad_norm": 43.993595123291016, + "learning_rate": 5e-06, + "loss": 0.9735, + "num_input_tokens_seen": 295336888, + "step": 2338 + }, + { + "epoch": 0.5997563009042519, + "loss": 1.2753689289093018, + "loss_ce": 0.0009548969683237374, + "loss_iou": 0.59375, + "loss_num": 0.0172119140625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 295336888, + "step": 2338 + }, + { + "epoch": 0.6000128262681973, + "grad_norm": 34.445648193359375, + "learning_rate": 5e-06, + "loss": 1.0403, + "num_input_tokens_seen": 295463044, + "step": 2339 + }, + { + "epoch": 0.6000128262681973, + "loss": 1.1691168546676636, + "loss_ce": 0.0026129393372684717, + "loss_iou": 0.53125, + "loss_num": 0.02001953125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 295463044, + "step": 2339 + }, + { + "epoch": 0.6002693516321427, + "grad_norm": 39.63389587402344, + "learning_rate": 5e-06, + "loss": 0.9238, + "num_input_tokens_seen": 295589576, + "step": 2340 + }, + { + "epoch": 0.6002693516321427, + "loss": 1.020135521888733, + "loss_ce": 0.0006042669410817325, + "loss_iou": 0.46875, + "loss_num": 0.0159912109375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 295589576, + "step": 2340 + }, + { + "epoch": 0.600525876996088, + "grad_norm": 77.85896301269531, + "learning_rate": 5e-06, + "loss": 0.9247, + "num_input_tokens_seen": 295715628, + "step": 2341 + }, + { + "epoch": 0.600525876996088, + "loss": 1.034177303314209, + "loss_ce": 0.00024170703545678407, + "loss_iou": 0.486328125, + "loss_num": 0.0120849609375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 295715628, + "step": 2341 + }, + { + "epoch": 0.6007824023600333, + "grad_norm": 60.31460189819336, + "learning_rate": 5e-06, + "loss": 1.0096, + "num_input_tokens_seen": 295843216, + "step": 2342 + }, + { + "epoch": 0.6007824023600333, + "loss": 1.0265511274337769, + "loss_ce": 0.00018396957602817565, + "loss_iou": 0.466796875, + "loss_num": 0.0185546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 295843216, + "step": 2342 + }, + { + "epoch": 0.6010389277239787, + "grad_norm": 43.02094268798828, + "learning_rate": 5e-06, + "loss": 0.9721, + "num_input_tokens_seen": 295970068, + "step": 2343 + }, + { + "epoch": 0.6010389277239787, + "loss": 0.840064525604248, + "loss_ce": 0.001685582334175706, + "loss_iou": 0.396484375, + "loss_num": 0.00933837890625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 295970068, + "step": 2343 + }, + { + "epoch": 0.6012954530879241, + "grad_norm": 30.822534561157227, + "learning_rate": 5e-06, + "loss": 0.9401, + "num_input_tokens_seen": 296095760, + "step": 2344 + }, + { + "epoch": 0.6012954530879241, + "loss": 0.858420729637146, + "loss_ce": 0.001975464401766658, + "loss_iou": 0.40234375, + "loss_num": 0.01055908203125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 296095760, + "step": 2344 + }, + { + "epoch": 0.6015519784518695, + "grad_norm": 31.357173919677734, + "learning_rate": 5e-06, + "loss": 0.9409, + "num_input_tokens_seen": 296221768, + "step": 2345 + }, + { + "epoch": 0.6015519784518695, + "loss": 0.9924489259719849, + "loss_ce": 0.004167623817920685, + "loss_iou": 0.44140625, + "loss_num": 0.020751953125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 296221768, + "step": 2345 + }, + { + "epoch": 0.6018085038158147, + "grad_norm": 36.41876983642578, + "learning_rate": 5e-06, + "loss": 1.0142, + "num_input_tokens_seen": 296348400, + "step": 2346 + }, + { + "epoch": 0.6018085038158147, + "loss": 0.9486322999000549, + "loss_ce": 0.0013666781596839428, + "loss_iou": 0.427734375, + "loss_num": 0.0181884765625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 296348400, + "step": 2346 + }, + { + "epoch": 0.6020650291797601, + "grad_norm": 39.93068313598633, + "learning_rate": 5e-06, + "loss": 1.1697, + "num_input_tokens_seen": 296474540, + "step": 2347 + }, + { + "epoch": 0.6020650291797601, + "loss": 1.3424949645996094, + "loss_ce": 0.0006981391925364733, + "loss_iou": 0.59375, + "loss_num": 0.03125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 296474540, + "step": 2347 + }, + { + "epoch": 0.6023215545437055, + "grad_norm": 55.9630126953125, + "learning_rate": 5e-06, + "loss": 0.8897, + "num_input_tokens_seen": 296601192, + "step": 2348 + }, + { + "epoch": 0.6023215545437055, + "loss": 1.0129127502441406, + "loss_ce": 0.0016823092009872198, + "loss_iou": 0.455078125, + "loss_num": 0.02001953125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 296601192, + "step": 2348 + }, + { + "epoch": 0.6025780799076509, + "grad_norm": 77.8394775390625, + "learning_rate": 5e-06, + "loss": 1.1688, + "num_input_tokens_seen": 296727160, + "step": 2349 + }, + { + "epoch": 0.6025780799076509, + "loss": 1.2625597715377808, + "loss_ce": 0.0008409414440393448, + "loss_iou": 0.57421875, + "loss_num": 0.02294921875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 296727160, + "step": 2349 + }, + { + "epoch": 0.6028346052715963, + "grad_norm": 45.86054229736328, + "learning_rate": 5e-06, + "loss": 1.0527, + "num_input_tokens_seen": 296852516, + "step": 2350 + }, + { + "epoch": 0.6028346052715963, + "loss": 0.9557281732559204, + "loss_ce": 0.0016266198363155127, + "loss_iou": 0.4453125, + "loss_num": 0.01275634765625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 296852516, + "step": 2350 + }, + { + "epoch": 0.6030911306355415, + "grad_norm": 41.98226547241211, + "learning_rate": 5e-06, + "loss": 1.0907, + "num_input_tokens_seen": 296977948, + "step": 2351 + }, + { + "epoch": 0.6030911306355415, + "loss": 1.0982770919799805, + "loss_ce": 0.00037677702493965626, + "loss_iou": 0.484375, + "loss_num": 0.0255126953125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 296977948, + "step": 2351 + }, + { + "epoch": 0.6033476559994869, + "grad_norm": 40.464263916015625, + "learning_rate": 5e-06, + "loss": 1.0874, + "num_input_tokens_seen": 297103912, + "step": 2352 + }, + { + "epoch": 0.6033476559994869, + "loss": 0.9715901613235474, + "loss_ce": 0.00039873551577329636, + "loss_iou": 0.44921875, + "loss_num": 0.0147705078125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 297103912, + "step": 2352 + }, + { + "epoch": 0.6036041813634323, + "grad_norm": 42.39037322998047, + "learning_rate": 5e-06, + "loss": 0.8749, + "num_input_tokens_seen": 297230496, + "step": 2353 + }, + { + "epoch": 0.6036041813634323, + "loss": 0.7754807472229004, + "loss_ce": 0.0005784243112429976, + "loss_iou": 0.373046875, + "loss_num": 0.005889892578125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 297230496, + "step": 2353 + }, + { + "epoch": 0.6038607067273777, + "grad_norm": 47.811119079589844, + "learning_rate": 5e-06, + "loss": 0.9165, + "num_input_tokens_seen": 297356444, + "step": 2354 + }, + { + "epoch": 0.6038607067273777, + "loss": 0.8914203643798828, + "loss_ce": 0.001283644000068307, + "loss_iou": 0.41015625, + "loss_num": 0.01416015625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 297356444, + "step": 2354 + }, + { + "epoch": 0.6041172320913231, + "grad_norm": 53.22089385986328, + "learning_rate": 5e-06, + "loss": 1.1213, + "num_input_tokens_seen": 297482620, + "step": 2355 + }, + { + "epoch": 0.6041172320913231, + "loss": 1.1584198474884033, + "loss_ce": 0.00607612170279026, + "loss_iou": 0.50390625, + "loss_num": 0.0289306640625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 297482620, + "step": 2355 + }, + { + "epoch": 0.6043737574552683, + "grad_norm": 56.330204010009766, + "learning_rate": 5e-06, + "loss": 1.0271, + "num_input_tokens_seen": 297608764, + "step": 2356 + }, + { + "epoch": 0.6043737574552683, + "loss": 1.0233381986618042, + "loss_ce": 0.0003889874496962875, + "loss_iou": 0.478515625, + "loss_num": 0.012939453125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 297608764, + "step": 2356 + }, + { + "epoch": 0.6046302828192137, + "grad_norm": 62.2961540222168, + "learning_rate": 5e-06, + "loss": 1.0707, + "num_input_tokens_seen": 297734604, + "step": 2357 + }, + { + "epoch": 0.6046302828192137, + "loss": 1.034083366394043, + "loss_ce": 0.0028332697693258524, + "loss_iou": 0.48046875, + "loss_num": 0.01422119140625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 297734604, + "step": 2357 + }, + { + "epoch": 0.6048868081831591, + "grad_norm": 54.8119010925293, + "learning_rate": 5e-06, + "loss": 1.2108, + "num_input_tokens_seen": 297859612, + "step": 2358 + }, + { + "epoch": 0.6048868081831591, + "loss": 1.0146894454956055, + "loss_ce": 0.0015058487188071012, + "loss_iou": 0.4609375, + "loss_num": 0.0185546875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 297859612, + "step": 2358 + }, + { + "epoch": 0.6051433335471045, + "grad_norm": 58.419010162353516, + "learning_rate": 5e-06, + "loss": 1.0189, + "num_input_tokens_seen": 297987700, + "step": 2359 + }, + { + "epoch": 0.6051433335471045, + "loss": 1.0325638055801392, + "loss_ce": 0.0018020968418568373, + "loss_iou": 0.478515625, + "loss_num": 0.0147705078125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 297987700, + "step": 2359 + }, + { + "epoch": 0.6053998589110499, + "grad_norm": 59.97882843017578, + "learning_rate": 5e-06, + "loss": 1.0537, + "num_input_tokens_seen": 298113972, + "step": 2360 + }, + { + "epoch": 0.6053998589110499, + "loss": 0.9035608172416687, + "loss_ce": 0.0007287994958460331, + "loss_iou": 0.4296875, + "loss_num": 0.00872802734375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 298113972, + "step": 2360 + }, + { + "epoch": 0.6056563842749952, + "grad_norm": 37.03329849243164, + "learning_rate": 5e-06, + "loss": 1.0774, + "num_input_tokens_seen": 298239040, + "step": 2361 + }, + { + "epoch": 0.6056563842749952, + "loss": 1.0830841064453125, + "loss_ce": 0.0010528210550546646, + "loss_iou": 0.5, + "loss_num": 0.0166015625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 298239040, + "step": 2361 + }, + { + "epoch": 0.6059129096389405, + "grad_norm": 34.191184997558594, + "learning_rate": 5e-06, + "loss": 1.0617, + "num_input_tokens_seen": 298365180, + "step": 2362 + }, + { + "epoch": 0.6059129096389405, + "loss": 0.9471461772918701, + "loss_ce": 0.002810215577483177, + "loss_iou": 0.43359375, + "loss_num": 0.01507568359375, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 298365180, + "step": 2362 + }, + { + "epoch": 0.6061694350028859, + "grad_norm": 59.82087707519531, + "learning_rate": 5e-06, + "loss": 1.0054, + "num_input_tokens_seen": 298490684, + "step": 2363 + }, + { + "epoch": 0.6061694350028859, + "loss": 1.012795090675354, + "loss_ce": 0.0005880154203623533, + "loss_iou": 0.478515625, + "loss_num": 0.01123046875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 298490684, + "step": 2363 + }, + { + "epoch": 0.6064259603668313, + "grad_norm": 55.13700866699219, + "learning_rate": 5e-06, + "loss": 1.1554, + "num_input_tokens_seen": 298615908, + "step": 2364 + }, + { + "epoch": 0.6064259603668313, + "loss": 1.108099102973938, + "loss_ce": 0.001165477791801095, + "loss_iou": 0.5, + "loss_num": 0.021240234375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 298615908, + "step": 2364 + }, + { + "epoch": 0.6066824857307767, + "grad_norm": 34.03069305419922, + "learning_rate": 5e-06, + "loss": 1.0088, + "num_input_tokens_seen": 298741656, + "step": 2365 + }, + { + "epoch": 0.6066824857307767, + "loss": 0.950843870639801, + "loss_ce": 0.0033341727685183287, + "loss_iou": 0.412109375, + "loss_num": 0.0247802734375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 298741656, + "step": 2365 + }, + { + "epoch": 0.606939011094722, + "grad_norm": 48.4284553527832, + "learning_rate": 5e-06, + "loss": 0.876, + "num_input_tokens_seen": 298867672, + "step": 2366 + }, + { + "epoch": 0.606939011094722, + "loss": 0.7349511981010437, + "loss_ce": 0.0030176120344549417, + "loss_iou": 0.34375, + "loss_num": 0.0087890625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 298867672, + "step": 2366 + }, + { + "epoch": 0.6071955364586673, + "grad_norm": 56.36427307128906, + "learning_rate": 5e-06, + "loss": 1.0335, + "num_input_tokens_seen": 298992812, + "step": 2367 + }, + { + "epoch": 0.6071955364586673, + "loss": 0.9154160022735596, + "loss_ce": 0.0018418596591800451, + "loss_iou": 0.435546875, + "loss_num": 0.008544921875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 298992812, + "step": 2367 + }, + { + "epoch": 0.6074520618226127, + "grad_norm": 45.70939636230469, + "learning_rate": 5e-06, + "loss": 0.9753, + "num_input_tokens_seen": 299119044, + "step": 2368 + }, + { + "epoch": 0.6074520618226127, + "loss": 0.7892074584960938, + "loss_ce": 0.00014495570212602615, + "loss_iou": 0.37890625, + "loss_num": 0.0059814453125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 299119044, + "step": 2368 + }, + { + "epoch": 0.6077085871865581, + "grad_norm": 42.5671272277832, + "learning_rate": 5e-06, + "loss": 0.9652, + "num_input_tokens_seen": 299246484, + "step": 2369 + }, + { + "epoch": 0.6077085871865581, + "loss": 0.8440212607383728, + "loss_ce": 0.000759531685616821, + "loss_iou": 0.404296875, + "loss_num": 0.006805419921875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 299246484, + "step": 2369 + }, + { + "epoch": 0.6079651125505035, + "grad_norm": 38.56281661987305, + "learning_rate": 5e-06, + "loss": 0.853, + "num_input_tokens_seen": 299372824, + "step": 2370 + }, + { + "epoch": 0.6079651125505035, + "loss": 0.8644446134567261, + "loss_ce": 0.00213992758654058, + "loss_iou": 0.390625, + "loss_num": 0.015869140625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 299372824, + "step": 2370 + }, + { + "epoch": 0.6082216379144488, + "grad_norm": 76.17793273925781, + "learning_rate": 5e-06, + "loss": 0.9996, + "num_input_tokens_seen": 299499148, + "step": 2371 + }, + { + "epoch": 0.6082216379144488, + "loss": 0.9436133503913879, + "loss_ce": 0.0007422103662975132, + "loss_iou": 0.435546875, + "loss_num": 0.01422119140625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 299499148, + "step": 2371 + }, + { + "epoch": 0.6084781632783941, + "grad_norm": 47.47568130493164, + "learning_rate": 5e-06, + "loss": 1.0497, + "num_input_tokens_seen": 299625756, + "step": 2372 + }, + { + "epoch": 0.6084781632783941, + "loss": 0.9972690343856812, + "loss_ce": 0.0011752945138141513, + "loss_iou": 0.47265625, + "loss_num": 0.009765625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 299625756, + "step": 2372 + }, + { + "epoch": 0.6087346886423395, + "grad_norm": 19.725831985473633, + "learning_rate": 5e-06, + "loss": 0.9536, + "num_input_tokens_seen": 299752204, + "step": 2373 + }, + { + "epoch": 0.6087346886423395, + "loss": 1.2129980325698853, + "loss_ce": 0.0005957222892902792, + "loss_iou": 0.546875, + "loss_num": 0.02392578125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 299752204, + "step": 2373 + }, + { + "epoch": 0.6089912140062849, + "grad_norm": 21.840272903442383, + "learning_rate": 5e-06, + "loss": 0.9352, + "num_input_tokens_seen": 299877208, + "step": 2374 + }, + { + "epoch": 0.6089912140062849, + "loss": 0.8713295459747314, + "loss_ce": 0.0004799108428414911, + "loss_iou": 0.40625, + "loss_num": 0.01190185546875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 299877208, + "step": 2374 + }, + { + "epoch": 0.6092477393702302, + "grad_norm": 29.53074073791504, + "learning_rate": 5e-06, + "loss": 0.9663, + "num_input_tokens_seen": 300004208, + "step": 2375 + }, + { + "epoch": 0.6092477393702302, + "loss": 0.8725451231002808, + "loss_ce": 0.00047486129915341735, + "loss_iou": 0.3984375, + "loss_num": 0.0147705078125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 300004208, + "step": 2375 + }, + { + "epoch": 0.6095042647341756, + "grad_norm": 41.67243576049805, + "learning_rate": 5e-06, + "loss": 0.921, + "num_input_tokens_seen": 300131384, + "step": 2376 + }, + { + "epoch": 0.6095042647341756, + "loss": 1.0623406171798706, + "loss_ce": 0.00032892514718696475, + "loss_iou": 0.478515625, + "loss_num": 0.0206298828125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 300131384, + "step": 2376 + }, + { + "epoch": 0.6097607900981209, + "grad_norm": 77.45318603515625, + "learning_rate": 5e-06, + "loss": 0.9334, + "num_input_tokens_seen": 300257100, + "step": 2377 + }, + { + "epoch": 0.6097607900981209, + "loss": 0.9201186895370483, + "loss_ce": 0.0001968123106053099, + "loss_iou": 0.439453125, + "loss_num": 0.0084228515625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 300257100, + "step": 2377 + }, + { + "epoch": 0.6100173154620663, + "grad_norm": 57.750789642333984, + "learning_rate": 5e-06, + "loss": 1.0497, + "num_input_tokens_seen": 300384080, + "step": 2378 + }, + { + "epoch": 0.6100173154620663, + "loss": 1.127772331237793, + "loss_ce": 0.00179577199742198, + "loss_iou": 0.5078125, + "loss_num": 0.02197265625, + "loss_xval": 1.125, + "num_input_tokens_seen": 300384080, + "step": 2378 + }, + { + "epoch": 0.6102738408260117, + "grad_norm": 30.240985870361328, + "learning_rate": 5e-06, + "loss": 0.963, + "num_input_tokens_seen": 300509952, + "step": 2379 + }, + { + "epoch": 0.6102738408260117, + "loss": 1.0444300174713135, + "loss_ce": 0.0019494625739753246, + "loss_iou": 0.4765625, + "loss_num": 0.0179443359375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 300509952, + "step": 2379 + }, + { + "epoch": 0.610530366189957, + "grad_norm": 31.022733688354492, + "learning_rate": 5e-06, + "loss": 1.0091, + "num_input_tokens_seen": 300636488, + "step": 2380 + }, + { + "epoch": 0.610530366189957, + "loss": 0.907547652721405, + "loss_ce": 0.0012976655270904303, + "loss_iou": 0.421875, + "loss_num": 0.012451171875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 300636488, + "step": 2380 + }, + { + "epoch": 0.6107868915539024, + "grad_norm": 32.02003860473633, + "learning_rate": 5e-06, + "loss": 0.8687, + "num_input_tokens_seen": 300763232, + "step": 2381 + }, + { + "epoch": 0.6107868915539024, + "loss": 0.6731455326080322, + "loss_ce": 0.0002939658588729799, + "loss_iou": 0.3203125, + "loss_num": 0.0068359375, + "loss_xval": 0.671875, + "num_input_tokens_seen": 300763232, + "step": 2381 + }, + { + "epoch": 0.6110434169178478, + "grad_norm": 49.60429763793945, + "learning_rate": 5e-06, + "loss": 0.9664, + "num_input_tokens_seen": 300889692, + "step": 2382 + }, + { + "epoch": 0.6110434169178478, + "loss": 0.9684589505195618, + "loss_ce": 0.0006855035899206996, + "loss_iou": 0.443359375, + "loss_num": 0.0159912109375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 300889692, + "step": 2382 + }, + { + "epoch": 0.6112999422817931, + "grad_norm": 72.6818618774414, + "learning_rate": 5e-06, + "loss": 1.1232, + "num_input_tokens_seen": 301016284, + "step": 2383 + }, + { + "epoch": 0.6112999422817931, + "loss": 0.9382489919662476, + "loss_ce": 0.00026069642626680434, + "loss_iou": 0.4453125, + "loss_num": 0.009521484375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 301016284, + "step": 2383 + }, + { + "epoch": 0.6115564676457385, + "grad_norm": 39.05830764770508, + "learning_rate": 5e-06, + "loss": 1.1238, + "num_input_tokens_seen": 301141104, + "step": 2384 + }, + { + "epoch": 0.6115564676457385, + "loss": 1.036285400390625, + "loss_ce": 0.00210572499781847, + "loss_iou": 0.46484375, + "loss_num": 0.0213623046875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 301141104, + "step": 2384 + }, + { + "epoch": 0.6118129930096838, + "grad_norm": 34.92748260498047, + "learning_rate": 5e-06, + "loss": 0.9151, + "num_input_tokens_seen": 301267172, + "step": 2385 + }, + { + "epoch": 0.6118129930096838, + "loss": 0.9949415922164917, + "loss_ce": 0.00031264187418855727, + "loss_iou": 0.474609375, + "loss_num": 0.00921630859375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 301267172, + "step": 2385 + }, + { + "epoch": 0.6120695183736292, + "grad_norm": 62.76207733154297, + "learning_rate": 5e-06, + "loss": 0.9627, + "num_input_tokens_seen": 301393388, + "step": 2386 + }, + { + "epoch": 0.6120695183736292, + "loss": 0.8363853693008423, + "loss_ce": 0.0009361720876768231, + "loss_iou": 0.384765625, + "loss_num": 0.01287841796875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 301393388, + "step": 2386 + }, + { + "epoch": 0.6123260437375746, + "grad_norm": 50.688533782958984, + "learning_rate": 5e-06, + "loss": 0.9641, + "num_input_tokens_seen": 301520216, + "step": 2387 + }, + { + "epoch": 0.6123260437375746, + "loss": 0.9709949493408203, + "loss_ce": 0.003221467137336731, + "loss_iou": 0.455078125, + "loss_num": 0.01171875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 301520216, + "step": 2387 + }, + { + "epoch": 0.6125825691015199, + "grad_norm": 33.81632995605469, + "learning_rate": 5e-06, + "loss": 0.8985, + "num_input_tokens_seen": 301646352, + "step": 2388 + }, + { + "epoch": 0.6125825691015199, + "loss": 0.7463772296905518, + "loss_ce": 0.001748309121467173, + "loss_iou": 0.357421875, + "loss_num": 0.00592041015625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 301646352, + "step": 2388 + }, + { + "epoch": 0.6128390944654653, + "grad_norm": 36.596153259277344, + "learning_rate": 5e-06, + "loss": 1.0517, + "num_input_tokens_seen": 301772320, + "step": 2389 + }, + { + "epoch": 0.6128390944654653, + "loss": 0.8701430559158325, + "loss_ce": 0.00026998750399798155, + "loss_iou": 0.396484375, + "loss_num": 0.01519775390625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 301772320, + "step": 2389 + }, + { + "epoch": 0.6130956198294106, + "grad_norm": 46.07404708862305, + "learning_rate": 5e-06, + "loss": 0.8785, + "num_input_tokens_seen": 301899656, + "step": 2390 + }, + { + "epoch": 0.6130956198294106, + "loss": 0.8267738223075867, + "loss_ce": 0.000113645575765986, + "loss_iou": 0.39453125, + "loss_num": 0.007476806640625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 301899656, + "step": 2390 + }, + { + "epoch": 0.613352145193356, + "grad_norm": 41.02525329589844, + "learning_rate": 5e-06, + "loss": 0.9967, + "num_input_tokens_seen": 302026284, + "step": 2391 + }, + { + "epoch": 0.613352145193356, + "loss": 1.1226475238800049, + "loss_ce": 0.0005772191798314452, + "loss_iou": 0.5, + "loss_num": 0.024169921875, + "loss_xval": 1.125, + "num_input_tokens_seen": 302026284, + "step": 2391 + }, + { + "epoch": 0.6136086705573014, + "grad_norm": 58.112178802490234, + "learning_rate": 5e-06, + "loss": 1.0821, + "num_input_tokens_seen": 302151976, + "step": 2392 + }, + { + "epoch": 0.6136086705573014, + "loss": 1.0785229206085205, + "loss_ce": 0.004304094705730677, + "loss_iou": 0.486328125, + "loss_num": 0.0201416015625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 302151976, + "step": 2392 + }, + { + "epoch": 0.6138651959212467, + "grad_norm": 62.90297317504883, + "learning_rate": 5e-06, + "loss": 1.0704, + "num_input_tokens_seen": 302278356, + "step": 2393 + }, + { + "epoch": 0.6138651959212467, + "loss": 1.2281768321990967, + "loss_ce": 0.0021026916801929474, + "loss_iou": 0.55078125, + "loss_num": 0.0255126953125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 302278356, + "step": 2393 + }, + { + "epoch": 0.614121721285192, + "grad_norm": 40.189022064208984, + "learning_rate": 5e-06, + "loss": 0.9252, + "num_input_tokens_seen": 302405240, + "step": 2394 + }, + { + "epoch": 0.614121721285192, + "loss": 0.9091630578041077, + "loss_ce": 0.0004716459661722183, + "loss_iou": 0.421875, + "loss_num": 0.01287841796875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 302405240, + "step": 2394 + }, + { + "epoch": 0.6143782466491374, + "grad_norm": 52.83066940307617, + "learning_rate": 5e-06, + "loss": 1.0158, + "num_input_tokens_seen": 302531352, + "step": 2395 + }, + { + "epoch": 0.6143782466491374, + "loss": 0.9998695850372314, + "loss_ce": 0.0008461041725240648, + "loss_iou": 0.470703125, + "loss_num": 0.011474609375, + "loss_xval": 1.0, + "num_input_tokens_seen": 302531352, + "step": 2395 + }, + { + "epoch": 0.6146347720130828, + "grad_norm": 56.609371185302734, + "learning_rate": 5e-06, + "loss": 0.8816, + "num_input_tokens_seen": 302657008, + "step": 2396 + }, + { + "epoch": 0.6146347720130828, + "loss": 1.058960199356079, + "loss_ce": 0.0008547369507141411, + "loss_iou": 0.474609375, + "loss_num": 0.0213623046875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 302657008, + "step": 2396 + }, + { + "epoch": 0.6148912973770282, + "grad_norm": 53.55632400512695, + "learning_rate": 5e-06, + "loss": 1.0061, + "num_input_tokens_seen": 302783840, + "step": 2397 + }, + { + "epoch": 0.6148912973770282, + "loss": 1.0464787483215332, + "loss_ce": 0.0005804004613310099, + "loss_iou": 0.482421875, + "loss_num": 0.0157470703125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 302783840, + "step": 2397 + }, + { + "epoch": 0.6151478227409735, + "grad_norm": 35.964149475097656, + "learning_rate": 5e-06, + "loss": 0.9173, + "num_input_tokens_seen": 302909100, + "step": 2398 + }, + { + "epoch": 0.6151478227409735, + "loss": 0.8786402940750122, + "loss_ce": 0.0016871325206011534, + "loss_iou": 0.40234375, + "loss_num": 0.01434326171875, + "loss_xval": 0.875, + "num_input_tokens_seen": 302909100, + "step": 2398 + }, + { + "epoch": 0.6154043481049188, + "grad_norm": 176.68228149414062, + "learning_rate": 5e-06, + "loss": 0.882, + "num_input_tokens_seen": 303035200, + "step": 2399 + }, + { + "epoch": 0.6154043481049188, + "loss": 0.9574973583221436, + "loss_ce": 0.0031516484450548887, + "loss_iou": 0.435546875, + "loss_num": 0.016845703125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 303035200, + "step": 2399 + }, + { + "epoch": 0.6156608734688642, + "grad_norm": 44.34334945678711, + "learning_rate": 5e-06, + "loss": 0.8975, + "num_input_tokens_seen": 303161796, + "step": 2400 + }, + { + "epoch": 0.6156608734688642, + "loss": 0.9140017032623291, + "loss_ce": 0.001159908133558929, + "loss_iou": 0.4296875, + "loss_num": 0.01068115234375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 303161796, + "step": 2400 + }, + { + "epoch": 0.6159173988328096, + "grad_norm": 33.105735778808594, + "learning_rate": 5e-06, + "loss": 0.9074, + "num_input_tokens_seen": 303287048, + "step": 2401 + }, + { + "epoch": 0.6159173988328096, + "loss": 0.8978307247161865, + "loss_ce": 0.0008580397116020322, + "loss_iou": 0.427734375, + "loss_num": 0.00811767578125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 303287048, + "step": 2401 + }, + { + "epoch": 0.616173924196755, + "grad_norm": 22.148101806640625, + "learning_rate": 5e-06, + "loss": 0.8988, + "num_input_tokens_seen": 303413252, + "step": 2402 + }, + { + "epoch": 0.616173924196755, + "loss": 0.8386335372924805, + "loss_ce": 0.0026960265822708607, + "loss_iou": 0.388671875, + "loss_num": 0.0113525390625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 303413252, + "step": 2402 + }, + { + "epoch": 0.6164304495607004, + "grad_norm": 39.31587600708008, + "learning_rate": 5e-06, + "loss": 0.908, + "num_input_tokens_seen": 303539212, + "step": 2403 + }, + { + "epoch": 0.6164304495607004, + "loss": 1.0001246929168701, + "loss_ce": 0.0015895850956439972, + "loss_iou": 0.45703125, + "loss_num": 0.017333984375, + "loss_xval": 1.0, + "num_input_tokens_seen": 303539212, + "step": 2403 + }, + { + "epoch": 0.6166869749246456, + "grad_norm": 56.37080764770508, + "learning_rate": 5e-06, + "loss": 1.0032, + "num_input_tokens_seen": 303665312, + "step": 2404 + }, + { + "epoch": 0.6166869749246456, + "loss": 0.9108952879905701, + "loss_ce": 0.00025074605946429074, + "loss_iou": 0.427734375, + "loss_num": 0.0113525390625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 303665312, + "step": 2404 + }, + { + "epoch": 0.616943500288591, + "grad_norm": 42.014286041259766, + "learning_rate": 5e-06, + "loss": 1.0014, + "num_input_tokens_seen": 303790628, + "step": 2405 + }, + { + "epoch": 0.616943500288591, + "loss": 1.0262646675109863, + "loss_ce": 0.0003857887350022793, + "loss_iou": 0.458984375, + "loss_num": 0.02197265625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 303790628, + "step": 2405 + }, + { + "epoch": 0.6172000256525364, + "grad_norm": 43.7136344909668, + "learning_rate": 5e-06, + "loss": 0.9153, + "num_input_tokens_seen": 303918504, + "step": 2406 + }, + { + "epoch": 0.6172000256525364, + "loss": 0.9639586210250854, + "loss_ce": 0.002044574823230505, + "loss_iou": 0.4453125, + "loss_num": 0.014404296875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 303918504, + "step": 2406 + }, + { + "epoch": 0.6174565510164818, + "grad_norm": 62.77971267700195, + "learning_rate": 5e-06, + "loss": 0.9685, + "num_input_tokens_seen": 304044668, + "step": 2407 + }, + { + "epoch": 0.6174565510164818, + "loss": 1.000226616859436, + "loss_ce": 0.0002266664378112182, + "loss_iou": 0.48046875, + "loss_num": 0.007720947265625, + "loss_xval": 1.0, + "num_input_tokens_seen": 304044668, + "step": 2407 + }, + { + "epoch": 0.6177130763804272, + "grad_norm": 72.70613861083984, + "learning_rate": 5e-06, + "loss": 1.025, + "num_input_tokens_seen": 304170828, + "step": 2408 + }, + { + "epoch": 0.6177130763804272, + "loss": 0.994817316532135, + "loss_ce": 0.0036063911393284798, + "loss_iou": 0.458984375, + "loss_num": 0.01470947265625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 304170828, + "step": 2408 + }, + { + "epoch": 0.6179696017443724, + "grad_norm": 51.835689544677734, + "learning_rate": 5e-06, + "loss": 0.9831, + "num_input_tokens_seen": 304296324, + "step": 2409 + }, + { + "epoch": 0.6179696017443724, + "loss": 0.9526968598365784, + "loss_ce": 0.0044547030702233315, + "loss_iou": 0.4453125, + "loss_num": 0.01141357421875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 304296324, + "step": 2409 + }, + { + "epoch": 0.6182261271083178, + "grad_norm": 34.60609817504883, + "learning_rate": 5e-06, + "loss": 1.0443, + "num_input_tokens_seen": 304422616, + "step": 2410 + }, + { + "epoch": 0.6182261271083178, + "loss": 1.059563159942627, + "loss_ce": 0.00023700429301243275, + "loss_iou": 0.4609375, + "loss_num": 0.02734375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 304422616, + "step": 2410 + }, + { + "epoch": 0.6184826524722632, + "grad_norm": 46.0897331237793, + "learning_rate": 5e-06, + "loss": 1.0161, + "num_input_tokens_seen": 304548808, + "step": 2411 + }, + { + "epoch": 0.6184826524722632, + "loss": 0.9240692853927612, + "loss_ce": 0.0017059547826647758, + "loss_iou": 0.43359375, + "loss_num": 0.0111083984375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 304548808, + "step": 2411 + }, + { + "epoch": 0.6187391778362086, + "grad_norm": 63.52092742919922, + "learning_rate": 5e-06, + "loss": 1.0172, + "num_input_tokens_seen": 304675224, + "step": 2412 + }, + { + "epoch": 0.6187391778362086, + "loss": 1.0817303657531738, + "loss_ce": 0.0016522924415767193, + "loss_iou": 0.498046875, + "loss_num": 0.016845703125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 304675224, + "step": 2412 + }, + { + "epoch": 0.618995703200154, + "grad_norm": 56.15546417236328, + "learning_rate": 5e-06, + "loss": 0.9623, + "num_input_tokens_seen": 304801200, + "step": 2413 + }, + { + "epoch": 0.618995703200154, + "loss": 0.8813201189041138, + "loss_ce": 0.00046079274034127593, + "loss_iou": 0.423828125, + "loss_num": 0.0069580078125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 304801200, + "step": 2413 + }, + { + "epoch": 0.6192522285640992, + "grad_norm": 49.2623176574707, + "learning_rate": 5e-06, + "loss": 1.0095, + "num_input_tokens_seen": 304927724, + "step": 2414 + }, + { + "epoch": 0.6192522285640992, + "loss": 0.9392759799957275, + "loss_ce": 0.00031112079159356654, + "loss_iou": 0.44140625, + "loss_num": 0.01129150390625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 304927724, + "step": 2414 + }, + { + "epoch": 0.6195087539280446, + "grad_norm": 52.596336364746094, + "learning_rate": 5e-06, + "loss": 0.9432, + "num_input_tokens_seen": 305053968, + "step": 2415 + }, + { + "epoch": 0.6195087539280446, + "loss": 0.9761803150177002, + "loss_ce": 0.0005942760617472231, + "loss_iou": 0.462890625, + "loss_num": 0.0101318359375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 305053968, + "step": 2415 + }, + { + "epoch": 0.61976527929199, + "grad_norm": 53.8277702331543, + "learning_rate": 5e-06, + "loss": 0.9526, + "num_input_tokens_seen": 305180416, + "step": 2416 + }, + { + "epoch": 0.61976527929199, + "loss": 1.0766226053237915, + "loss_ce": 0.0009390468476340175, + "loss_iou": 0.490234375, + "loss_num": 0.0191650390625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 305180416, + "step": 2416 + }, + { + "epoch": 0.6200218046559354, + "grad_norm": 49.876991271972656, + "learning_rate": 5e-06, + "loss": 0.8889, + "num_input_tokens_seen": 305306560, + "step": 2417 + }, + { + "epoch": 0.6200218046559354, + "loss": 0.787087082862854, + "loss_ce": 0.00241913553327322, + "loss_iou": 0.3671875, + "loss_num": 0.00970458984375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 305306560, + "step": 2417 + }, + { + "epoch": 0.6202783300198808, + "grad_norm": 42.91297912597656, + "learning_rate": 5e-06, + "loss": 0.902, + "num_input_tokens_seen": 305433588, + "step": 2418 + }, + { + "epoch": 0.6202783300198808, + "loss": 0.9944955706596375, + "loss_ce": 0.0013314527459442616, + "loss_iou": 0.458984375, + "loss_num": 0.0152587890625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 305433588, + "step": 2418 + }, + { + "epoch": 0.620534855383826, + "grad_norm": 40.49419403076172, + "learning_rate": 5e-06, + "loss": 0.8986, + "num_input_tokens_seen": 305559180, + "step": 2419 + }, + { + "epoch": 0.620534855383826, + "loss": 0.9334796667098999, + "loss_ce": 0.002571461256593466, + "loss_iou": 0.431640625, + "loss_num": 0.013671875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 305559180, + "step": 2419 + }, + { + "epoch": 0.6207913807477714, + "grad_norm": 36.625492095947266, + "learning_rate": 5e-06, + "loss": 1.0845, + "num_input_tokens_seen": 305685372, + "step": 2420 + }, + { + "epoch": 0.6207913807477714, + "loss": 1.0503628253936768, + "loss_ce": 0.00422025378793478, + "loss_iou": 0.47265625, + "loss_num": 0.0201416015625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 305685372, + "step": 2420 + }, + { + "epoch": 0.6210479061117168, + "grad_norm": 45.93886184692383, + "learning_rate": 5e-06, + "loss": 0.9054, + "num_input_tokens_seen": 305811652, + "step": 2421 + }, + { + "epoch": 0.6210479061117168, + "loss": 0.8115324974060059, + "loss_ce": 0.0004973037866875529, + "loss_iou": 0.384765625, + "loss_num": 0.0081787109375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 305811652, + "step": 2421 + }, + { + "epoch": 0.6213044314756622, + "grad_norm": 67.83499145507812, + "learning_rate": 5e-06, + "loss": 1.0028, + "num_input_tokens_seen": 305938484, + "step": 2422 + }, + { + "epoch": 0.6213044314756622, + "loss": 1.1667850017547607, + "loss_ce": 0.0027225620578974485, + "loss_iou": 0.5234375, + "loss_num": 0.023681640625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 305938484, + "step": 2422 + }, + { + "epoch": 0.6215609568396075, + "grad_norm": 54.7205696105957, + "learning_rate": 5e-06, + "loss": 0.9993, + "num_input_tokens_seen": 306065332, + "step": 2423 + }, + { + "epoch": 0.6215609568396075, + "loss": 1.170240044593811, + "loss_ce": 0.004224344156682491, + "loss_iou": 0.51953125, + "loss_num": 0.0247802734375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 306065332, + "step": 2423 + }, + { + "epoch": 0.6218174822035529, + "grad_norm": 43.31586456298828, + "learning_rate": 5e-06, + "loss": 0.911, + "num_input_tokens_seen": 306191644, + "step": 2424 + }, + { + "epoch": 0.6218174822035529, + "loss": 0.8625841736793518, + "loss_ce": 0.001256045768968761, + "loss_iou": 0.404296875, + "loss_num": 0.01068115234375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 306191644, + "step": 2424 + }, + { + "epoch": 0.6220740075674982, + "grad_norm": 24.33938980102539, + "learning_rate": 5e-06, + "loss": 0.9781, + "num_input_tokens_seen": 306317552, + "step": 2425 + }, + { + "epoch": 0.6220740075674982, + "loss": 0.9842906594276428, + "loss_ce": 0.0028453469276428223, + "loss_iou": 0.45703125, + "loss_num": 0.01385498046875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 306317552, + "step": 2425 + }, + { + "epoch": 0.6223305329314436, + "grad_norm": 31.525400161743164, + "learning_rate": 5e-06, + "loss": 0.9093, + "num_input_tokens_seen": 306444112, + "step": 2426 + }, + { + "epoch": 0.6223305329314436, + "loss": 0.8607596158981323, + "loss_ce": 0.0008962840074673295, + "loss_iou": 0.404296875, + "loss_num": 0.01019287109375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 306444112, + "step": 2426 + }, + { + "epoch": 0.622587058295389, + "grad_norm": 42.063987731933594, + "learning_rate": 5e-06, + "loss": 0.9972, + "num_input_tokens_seen": 306570188, + "step": 2427 + }, + { + "epoch": 0.622587058295389, + "loss": 1.0734953880310059, + "loss_ce": 0.001229823217727244, + "loss_iou": 0.46484375, + "loss_num": 0.0289306640625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 306570188, + "step": 2427 + }, + { + "epoch": 0.6228435836593343, + "grad_norm": 56.7160530090332, + "learning_rate": 5e-06, + "loss": 1.0106, + "num_input_tokens_seen": 306696692, + "step": 2428 + }, + { + "epoch": 0.6228435836593343, + "loss": 1.3337113857269287, + "loss_ce": 0.0011919128010049462, + "loss_iou": 0.609375, + "loss_num": 0.0216064453125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 306696692, + "step": 2428 + }, + { + "epoch": 0.6231001090232797, + "grad_norm": 41.77696228027344, + "learning_rate": 5e-06, + "loss": 1.0593, + "num_input_tokens_seen": 306822308, + "step": 2429 + }, + { + "epoch": 0.6231001090232797, + "loss": 1.0048420429229736, + "loss_ce": 0.004353751428425312, + "loss_iou": 0.439453125, + "loss_num": 0.0240478515625, + "loss_xval": 1.0, + "num_input_tokens_seen": 306822308, + "step": 2429 + }, + { + "epoch": 0.623356634387225, + "grad_norm": 26.068958282470703, + "learning_rate": 5e-06, + "loss": 1.0531, + "num_input_tokens_seen": 306949272, + "step": 2430 + }, + { + "epoch": 0.623356634387225, + "loss": 1.3159947395324707, + "loss_ce": 0.005447814241051674, + "loss_iou": 0.58203125, + "loss_num": 0.0284423828125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 306949272, + "step": 2430 + }, + { + "epoch": 0.6236131597511704, + "grad_norm": 54.508235931396484, + "learning_rate": 5e-06, + "loss": 0.865, + "num_input_tokens_seen": 307075588, + "step": 2431 + }, + { + "epoch": 0.6236131597511704, + "loss": 0.8327459692955017, + "loss_ce": 0.0046209916472435, + "loss_iou": 0.388671875, + "loss_num": 0.01031494140625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 307075588, + "step": 2431 + }, + { + "epoch": 0.6238696851151158, + "grad_norm": 53.445064544677734, + "learning_rate": 5e-06, + "loss": 0.9169, + "num_input_tokens_seen": 307202460, + "step": 2432 + }, + { + "epoch": 0.6238696851151158, + "loss": 0.9381344318389893, + "loss_ce": 0.00014618027489632368, + "loss_iou": 0.4453125, + "loss_num": 0.009521484375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 307202460, + "step": 2432 + }, + { + "epoch": 0.6241262104790611, + "grad_norm": 46.51102066040039, + "learning_rate": 5e-06, + "loss": 1.0067, + "num_input_tokens_seen": 307329056, + "step": 2433 + }, + { + "epoch": 0.6241262104790611, + "loss": 0.9616233110427856, + "loss_ce": 0.0009299663361161947, + "loss_iou": 0.427734375, + "loss_num": 0.02099609375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 307329056, + "step": 2433 + }, + { + "epoch": 0.6243827358430065, + "grad_norm": 47.65647888183594, + "learning_rate": 5e-06, + "loss": 0.9523, + "num_input_tokens_seen": 307456060, + "step": 2434 + }, + { + "epoch": 0.6243827358430065, + "loss": 0.9719138145446777, + "loss_ce": 0.0007223976426757872, + "loss_iou": 0.439453125, + "loss_num": 0.0185546875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 307456060, + "step": 2434 + }, + { + "epoch": 0.6246392612069518, + "grad_norm": 56.969383239746094, + "learning_rate": 5e-06, + "loss": 1.0714, + "num_input_tokens_seen": 307581284, + "step": 2435 + }, + { + "epoch": 0.6246392612069518, + "loss": 1.0677063465118408, + "loss_ce": 0.0008119025733321905, + "loss_iou": 0.486328125, + "loss_num": 0.01904296875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 307581284, + "step": 2435 + }, + { + "epoch": 0.6248957865708972, + "grad_norm": 53.54359817504883, + "learning_rate": 5e-06, + "loss": 1.0061, + "num_input_tokens_seen": 307707380, + "step": 2436 + }, + { + "epoch": 0.6248957865708972, + "loss": 0.8166592121124268, + "loss_ce": 0.0002529309713281691, + "loss_iou": 0.388671875, + "loss_num": 0.0074462890625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 307707380, + "step": 2436 + }, + { + "epoch": 0.6251523119348426, + "grad_norm": 53.8261604309082, + "learning_rate": 5e-06, + "loss": 0.9745, + "num_input_tokens_seen": 307833580, + "step": 2437 + }, + { + "epoch": 0.6251523119348426, + "loss": 1.0104583501815796, + "loss_ce": 0.0011809748830273747, + "loss_iou": 0.4765625, + "loss_num": 0.01092529296875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 307833580, + "step": 2437 + }, + { + "epoch": 0.6254088372987879, + "grad_norm": 40.563655853271484, + "learning_rate": 5e-06, + "loss": 1.0593, + "num_input_tokens_seen": 307959132, + "step": 2438 + }, + { + "epoch": 0.6254088372987879, + "loss": 0.8795284032821655, + "loss_ce": 0.0025752554647624493, + "loss_iou": 0.416015625, + "loss_num": 0.0086669921875, + "loss_xval": 0.875, + "num_input_tokens_seen": 307959132, + "step": 2438 + }, + { + "epoch": 0.6256653626627333, + "grad_norm": 18.454668045043945, + "learning_rate": 5e-06, + "loss": 0.929, + "num_input_tokens_seen": 308085764, + "step": 2439 + }, + { + "epoch": 0.6256653626627333, + "loss": 0.8199399709701538, + "loss_ce": 0.000603993539698422, + "loss_iou": 0.384765625, + "loss_num": 0.0098876953125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 308085764, + "step": 2439 + }, + { + "epoch": 0.6259218880266786, + "grad_norm": 39.20863723754883, + "learning_rate": 5e-06, + "loss": 0.9786, + "num_input_tokens_seen": 308211724, + "step": 2440 + }, + { + "epoch": 0.6259218880266786, + "loss": 0.9764639139175415, + "loss_ce": 0.00038962927646934986, + "loss_iou": 0.443359375, + "loss_num": 0.0181884765625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 308211724, + "step": 2440 + }, + { + "epoch": 0.626178413390624, + "grad_norm": 45.93009948730469, + "learning_rate": 5e-06, + "loss": 0.9891, + "num_input_tokens_seen": 308337612, + "step": 2441 + }, + { + "epoch": 0.626178413390624, + "loss": 1.0349873304367065, + "loss_ce": 0.00031937434687279165, + "loss_iou": 0.46875, + "loss_num": 0.0194091796875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 308337612, + "step": 2441 + }, + { + "epoch": 0.6264349387545693, + "grad_norm": 63.89091873168945, + "learning_rate": 5e-06, + "loss": 0.9449, + "num_input_tokens_seen": 308465480, + "step": 2442 + }, + { + "epoch": 0.6264349387545693, + "loss": 0.8075037002563477, + "loss_ce": 0.000863034394569695, + "loss_iou": 0.375, + "loss_num": 0.0111083984375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 308465480, + "step": 2442 + }, + { + "epoch": 0.6266914641185147, + "grad_norm": 47.575706481933594, + "learning_rate": 5e-06, + "loss": 1.0394, + "num_input_tokens_seen": 308591852, + "step": 2443 + }, + { + "epoch": 0.6266914641185147, + "loss": 1.0176851749420166, + "loss_ce": 0.00010702457802835852, + "loss_iou": 0.470703125, + "loss_num": 0.01556396484375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 308591852, + "step": 2443 + }, + { + "epoch": 0.6269479894824601, + "grad_norm": 33.7256965637207, + "learning_rate": 5e-06, + "loss": 0.8833, + "num_input_tokens_seen": 308718332, + "step": 2444 + }, + { + "epoch": 0.6269479894824601, + "loss": 1.031751275062561, + "loss_ce": 0.0014778072945773602, + "loss_iou": 0.478515625, + "loss_num": 0.014892578125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 308718332, + "step": 2444 + }, + { + "epoch": 0.6272045148464055, + "grad_norm": 47.436580657958984, + "learning_rate": 5e-06, + "loss": 0.9596, + "num_input_tokens_seen": 308844500, + "step": 2445 + }, + { + "epoch": 0.6272045148464055, + "loss": 0.8321675062179565, + "loss_ce": 0.00013626072905026376, + "loss_iou": 0.3828125, + "loss_num": 0.0130615234375, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 308844500, + "step": 2445 + }, + { + "epoch": 0.6274610402103508, + "grad_norm": 63.93471908569336, + "learning_rate": 5e-06, + "loss": 1.0999, + "num_input_tokens_seen": 308970852, + "step": 2446 + }, + { + "epoch": 0.6274610402103508, + "loss": 1.1101393699645996, + "loss_ce": 0.0012526819482445717, + "loss_iou": 0.4921875, + "loss_num": 0.0245361328125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 308970852, + "step": 2446 + }, + { + "epoch": 0.6277175655742961, + "grad_norm": 48.979671478271484, + "learning_rate": 5e-06, + "loss": 1.0583, + "num_input_tokens_seen": 309096132, + "step": 2447 + }, + { + "epoch": 0.6277175655742961, + "loss": 1.1795204877853394, + "loss_ce": 0.0008094889344647527, + "loss_iou": 0.5390625, + "loss_num": 0.0203857421875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 309096132, + "step": 2447 + }, + { + "epoch": 0.6279740909382415, + "grad_norm": 49.110591888427734, + "learning_rate": 5e-06, + "loss": 0.9865, + "num_input_tokens_seen": 309222228, + "step": 2448 + }, + { + "epoch": 0.6279740909382415, + "loss": 0.9869959354400635, + "loss_ce": 0.0001795334101188928, + "loss_iou": 0.458984375, + "loss_num": 0.01397705078125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 309222228, + "step": 2448 + }, + { + "epoch": 0.6282306163021869, + "grad_norm": 53.28964614868164, + "learning_rate": 5e-06, + "loss": 1.1123, + "num_input_tokens_seen": 309348584, + "step": 2449 + }, + { + "epoch": 0.6282306163021869, + "loss": 1.3824735879898071, + "loss_ce": 0.0025907293893396854, + "loss_iou": 0.61328125, + "loss_num": 0.030517578125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 309348584, + "step": 2449 + }, + { + "epoch": 0.6284871416661323, + "grad_norm": 69.24656677246094, + "learning_rate": 5e-06, + "loss": 0.8532, + "num_input_tokens_seen": 309474956, + "step": 2450 + }, + { + "epoch": 0.6284871416661323, + "loss": 0.8674707412719727, + "loss_ce": 0.007119216024875641, + "loss_iou": 0.4140625, + "loss_num": 0.0067138671875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 309474956, + "step": 2450 + }, + { + "epoch": 0.6287436670300776, + "grad_norm": 51.120059967041016, + "learning_rate": 5e-06, + "loss": 0.9339, + "num_input_tokens_seen": 309601740, + "step": 2451 + }, + { + "epoch": 0.6287436670300776, + "loss": 0.9942096471786499, + "loss_ce": 0.0015338478842750192, + "loss_iou": 0.458984375, + "loss_num": 0.01519775390625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 309601740, + "step": 2451 + }, + { + "epoch": 0.6290001923940229, + "grad_norm": 20.52977180480957, + "learning_rate": 5e-06, + "loss": 0.806, + "num_input_tokens_seen": 309728228, + "step": 2452 + }, + { + "epoch": 0.6290001923940229, + "loss": 0.6880981922149658, + "loss_ce": 0.0010865030344575644, + "loss_iou": 0.326171875, + "loss_num": 0.0068359375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 309728228, + "step": 2452 + }, + { + "epoch": 0.6292567177579683, + "grad_norm": 38.78668212890625, + "learning_rate": 5e-06, + "loss": 0.9607, + "num_input_tokens_seen": 309855364, + "step": 2453 + }, + { + "epoch": 0.6292567177579683, + "loss": 0.8277446031570435, + "loss_ce": 0.00010788270446937531, + "loss_iou": 0.388671875, + "loss_num": 0.0103759765625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 309855364, + "step": 2453 + }, + { + "epoch": 0.6295132431219137, + "grad_norm": 51.809242248535156, + "learning_rate": 5e-06, + "loss": 0.9658, + "num_input_tokens_seen": 309982544, + "step": 2454 + }, + { + "epoch": 0.6295132431219137, + "loss": 0.9454361200332642, + "loss_ce": 0.002076731063425541, + "loss_iou": 0.4375, + "loss_num": 0.01397705078125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 309982544, + "step": 2454 + }, + { + "epoch": 0.6297697684858591, + "grad_norm": 59.76614761352539, + "learning_rate": 5e-06, + "loss": 1.0103, + "num_input_tokens_seen": 310109036, + "step": 2455 + }, + { + "epoch": 0.6297697684858591, + "loss": 1.0230460166931152, + "loss_ce": 0.001561575336381793, + "loss_iou": 0.470703125, + "loss_num": 0.015625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 310109036, + "step": 2455 + }, + { + "epoch": 0.6300262938498044, + "grad_norm": 44.28806686401367, + "learning_rate": 5e-06, + "loss": 1.0503, + "num_input_tokens_seen": 310234712, + "step": 2456 + }, + { + "epoch": 0.6300262938498044, + "loss": 1.1191718578338623, + "loss_ce": 0.004425740335136652, + "loss_iou": 0.515625, + "loss_num": 0.0172119140625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 310234712, + "step": 2456 + }, + { + "epoch": 0.6302828192137497, + "grad_norm": 36.6906623840332, + "learning_rate": 5e-06, + "loss": 0.9416, + "num_input_tokens_seen": 310361988, + "step": 2457 + }, + { + "epoch": 0.6302828192137497, + "loss": 1.0001568794250488, + "loss_ce": 0.0006451534572988749, + "loss_iou": 0.462890625, + "loss_num": 0.01495361328125, + "loss_xval": 1.0, + "num_input_tokens_seen": 310361988, + "step": 2457 + }, + { + "epoch": 0.6305393445776951, + "grad_norm": 27.16981315612793, + "learning_rate": 5e-06, + "loss": 1.0039, + "num_input_tokens_seen": 310487576, + "step": 2458 + }, + { + "epoch": 0.6305393445776951, + "loss": 1.0839290618896484, + "loss_ce": 0.0023861126974225044, + "loss_iou": 0.482421875, + "loss_num": 0.0228271484375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 310487576, + "step": 2458 + }, + { + "epoch": 0.6307958699416405, + "grad_norm": 58.13850784301758, + "learning_rate": 5e-06, + "loss": 0.9212, + "num_input_tokens_seen": 310613696, + "step": 2459 + }, + { + "epoch": 0.6307958699416405, + "loss": 1.01338529586792, + "loss_ce": 0.0026430024299770594, + "loss_iou": 0.453125, + "loss_num": 0.021240234375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 310613696, + "step": 2459 + }, + { + "epoch": 0.6310523953055859, + "grad_norm": 37.667747497558594, + "learning_rate": 5e-06, + "loss": 0.9937, + "num_input_tokens_seen": 310739368, + "step": 2460 + }, + { + "epoch": 0.6310523953055859, + "loss": 0.9947527647018433, + "loss_ce": 0.0006121775368228555, + "loss_iou": 0.44921875, + "loss_num": 0.0189208984375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 310739368, + "step": 2460 + }, + { + "epoch": 0.6313089206695311, + "grad_norm": 30.690032958984375, + "learning_rate": 5e-06, + "loss": 0.8858, + "num_input_tokens_seen": 310865468, + "step": 2461 + }, + { + "epoch": 0.6313089206695311, + "loss": 1.0220526456832886, + "loss_ce": 0.0010564837139099836, + "loss_iou": 0.466796875, + "loss_num": 0.01708984375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 310865468, + "step": 2461 + }, + { + "epoch": 0.6315654460334765, + "grad_norm": 36.148216247558594, + "learning_rate": 5e-06, + "loss": 0.9222, + "num_input_tokens_seen": 310991304, + "step": 2462 + }, + { + "epoch": 0.6315654460334765, + "loss": 1.041717529296875, + "loss_ce": 0.00021361219114623964, + "loss_iou": 0.4765625, + "loss_num": 0.01806640625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 310991304, + "step": 2462 + }, + { + "epoch": 0.6318219713974219, + "grad_norm": 73.86784362792969, + "learning_rate": 5e-06, + "loss": 1.0617, + "num_input_tokens_seen": 311118388, + "step": 2463 + }, + { + "epoch": 0.6318219713974219, + "loss": 1.0863783359527588, + "loss_ce": 0.002393897157162428, + "loss_iou": 0.5, + "loss_num": 0.0162353515625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 311118388, + "step": 2463 + }, + { + "epoch": 0.6320784967613673, + "grad_norm": 52.54774856567383, + "learning_rate": 5e-06, + "loss": 1.152, + "num_input_tokens_seen": 311245380, + "step": 2464 + }, + { + "epoch": 0.6320784967613673, + "loss": 1.068593978881836, + "loss_ce": 0.0007228001486510038, + "loss_iou": 0.50390625, + "loss_num": 0.01190185546875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 311245380, + "step": 2464 + }, + { + "epoch": 0.6323350221253127, + "grad_norm": 26.004074096679688, + "learning_rate": 5e-06, + "loss": 0.9573, + "num_input_tokens_seen": 311371692, + "step": 2465 + }, + { + "epoch": 0.6323350221253127, + "loss": 1.0843027830123901, + "loss_ce": 0.0017832244047895074, + "loss_iou": 0.5, + "loss_num": 0.0167236328125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 311371692, + "step": 2465 + }, + { + "epoch": 0.632591547489258, + "grad_norm": 40.31782150268555, + "learning_rate": 5e-06, + "loss": 1.0388, + "num_input_tokens_seen": 311497888, + "step": 2466 + }, + { + "epoch": 0.632591547489258, + "loss": 1.0369715690612793, + "loss_ce": 0.0008387021953240037, + "loss_iou": 0.486328125, + "loss_num": 0.01287841796875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 311497888, + "step": 2466 + }, + { + "epoch": 0.6328480728532033, + "grad_norm": 43.556365966796875, + "learning_rate": 5e-06, + "loss": 1.111, + "num_input_tokens_seen": 311624316, + "step": 2467 + }, + { + "epoch": 0.6328480728532033, + "loss": 0.9514755010604858, + "loss_ce": 0.0003036805137526244, + "loss_iou": 0.447265625, + "loss_num": 0.01153564453125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 311624316, + "step": 2467 + }, + { + "epoch": 0.6331045982171487, + "grad_norm": 41.3142204284668, + "learning_rate": 5e-06, + "loss": 0.9938, + "num_input_tokens_seen": 311752044, + "step": 2468 + }, + { + "epoch": 0.6331045982171487, + "loss": 1.0382400751113892, + "loss_ce": 0.003083840012550354, + "loss_iou": 0.48046875, + "loss_num": 0.0152587890625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 311752044, + "step": 2468 + }, + { + "epoch": 0.6333611235810941, + "grad_norm": 29.828590393066406, + "learning_rate": 5e-06, + "loss": 1.0563, + "num_input_tokens_seen": 311877324, + "step": 2469 + }, + { + "epoch": 0.6333611235810941, + "loss": 1.034632921218872, + "loss_ce": 0.0004533426254056394, + "loss_iou": 0.4765625, + "loss_num": 0.015869140625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 311877324, + "step": 2469 + }, + { + "epoch": 0.6336176489450395, + "grad_norm": 22.581378936767578, + "learning_rate": 5e-06, + "loss": 0.8632, + "num_input_tokens_seen": 312004224, + "step": 2470 + }, + { + "epoch": 0.6336176489450395, + "loss": 0.8245887756347656, + "loss_ce": 0.0015907082706689835, + "loss_iou": 0.3828125, + "loss_num": 0.01129150390625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 312004224, + "step": 2470 + }, + { + "epoch": 0.6338741743089849, + "grad_norm": 57.88581466674805, + "learning_rate": 5e-06, + "loss": 1.0075, + "num_input_tokens_seen": 312131140, + "step": 2471 + }, + { + "epoch": 0.6338741743089849, + "loss": 1.022648811340332, + "loss_ce": 0.0009202745277434587, + "loss_iou": 0.46875, + "loss_num": 0.01708984375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 312131140, + "step": 2471 + }, + { + "epoch": 0.6341306996729301, + "grad_norm": 46.05721664428711, + "learning_rate": 5e-06, + "loss": 1.0421, + "num_input_tokens_seen": 312258300, + "step": 2472 + }, + { + "epoch": 0.6341306996729301, + "loss": 0.9684139490127563, + "loss_ce": 0.0006405648309737444, + "loss_iou": 0.458984375, + "loss_num": 0.01031494140625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 312258300, + "step": 2472 + }, + { + "epoch": 0.6343872250368755, + "grad_norm": 37.079105377197266, + "learning_rate": 5e-06, + "loss": 0.8652, + "num_input_tokens_seen": 312383840, + "step": 2473 + }, + { + "epoch": 0.6343872250368755, + "loss": 0.7709866762161255, + "loss_ce": 0.00047885527601465583, + "loss_iou": 0.369140625, + "loss_num": 0.006561279296875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 312383840, + "step": 2473 + }, + { + "epoch": 0.6346437504008209, + "grad_norm": 34.36626052856445, + "learning_rate": 5e-06, + "loss": 1.0145, + "num_input_tokens_seen": 312510296, + "step": 2474 + }, + { + "epoch": 0.6346437504008209, + "loss": 1.0918447971343994, + "loss_ce": 4.7944708057912067e-05, + "loss_iou": 0.5, + "loss_num": 0.018310546875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 312510296, + "step": 2474 + }, + { + "epoch": 0.6349002757647663, + "grad_norm": 38.27058410644531, + "learning_rate": 5e-06, + "loss": 0.923, + "num_input_tokens_seen": 312636148, + "step": 2475 + }, + { + "epoch": 0.6349002757647663, + "loss": 0.9423344135284424, + "loss_ce": 0.0019047094974666834, + "loss_iou": 0.443359375, + "loss_num": 0.01068115234375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 312636148, + "step": 2475 + }, + { + "epoch": 0.6351568011287116, + "grad_norm": 48.85742950439453, + "learning_rate": 5e-06, + "loss": 1.1399, + "num_input_tokens_seen": 312763748, + "step": 2476 + }, + { + "epoch": 0.6351568011287116, + "loss": 1.1814463138580322, + "loss_ce": 0.0007821816252544522, + "loss_iou": 0.53125, + "loss_num": 0.0230712890625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 312763748, + "step": 2476 + }, + { + "epoch": 0.6354133264926569, + "grad_norm": 58.78425979614258, + "learning_rate": 5e-06, + "loss": 1.0311, + "num_input_tokens_seen": 312889716, + "step": 2477 + }, + { + "epoch": 0.6354133264926569, + "loss": 1.067948818206787, + "loss_ce": 0.002030777046456933, + "loss_iou": 0.482421875, + "loss_num": 0.0201416015625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 312889716, + "step": 2477 + }, + { + "epoch": 0.6356698518566023, + "grad_norm": 48.85798263549805, + "learning_rate": 5e-06, + "loss": 0.9953, + "num_input_tokens_seen": 313016780, + "step": 2478 + }, + { + "epoch": 0.6356698518566023, + "loss": 0.9967614412307739, + "loss_ce": 0.001644239411689341, + "loss_iou": 0.45703125, + "loss_num": 0.0166015625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 313016780, + "step": 2478 + }, + { + "epoch": 0.6359263772205477, + "grad_norm": 53.68509292602539, + "learning_rate": 5e-06, + "loss": 1.0086, + "num_input_tokens_seen": 313142452, + "step": 2479 + }, + { + "epoch": 0.6359263772205477, + "loss": 0.9821228981018066, + "loss_ce": 0.00043347227619960904, + "loss_iou": 0.455078125, + "loss_num": 0.01422119140625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 313142452, + "step": 2479 + }, + { + "epoch": 0.6361829025844931, + "grad_norm": 50.022579193115234, + "learning_rate": 5e-06, + "loss": 1.1482, + "num_input_tokens_seen": 313269064, + "step": 2480 + }, + { + "epoch": 0.6361829025844931, + "loss": 1.2547531127929688, + "loss_ce": 0.00475321477279067, + "loss_iou": 0.56640625, + "loss_num": 0.0234375, + "loss_xval": 1.25, + "num_input_tokens_seen": 313269064, + "step": 2480 + }, + { + "epoch": 0.6364394279484384, + "grad_norm": 39.800392150878906, + "learning_rate": 5e-06, + "loss": 0.8805, + "num_input_tokens_seen": 313394932, + "step": 2481 + }, + { + "epoch": 0.6364394279484384, + "loss": 0.8018169403076172, + "loss_ce": 0.002012226264923811, + "loss_iou": 0.37890625, + "loss_num": 0.008544921875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 313394932, + "step": 2481 + }, + { + "epoch": 0.6366959533123837, + "grad_norm": 43.395320892333984, + "learning_rate": 5e-06, + "loss": 0.9681, + "num_input_tokens_seen": 313520424, + "step": 2482 + }, + { + "epoch": 0.6366959533123837, + "loss": 0.8497616052627563, + "loss_ce": 0.0006405311869457364, + "loss_iou": 0.408203125, + "loss_num": 0.00665283203125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 313520424, + "step": 2482 + }, + { + "epoch": 0.6369524786763291, + "grad_norm": 45.77923583984375, + "learning_rate": 5e-06, + "loss": 1.0746, + "num_input_tokens_seen": 313647008, + "step": 2483 + }, + { + "epoch": 0.6369524786763291, + "loss": 0.9664580225944519, + "loss_ce": 0.0006377378012984991, + "loss_iou": 0.4453125, + "loss_num": 0.0150146484375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 313647008, + "step": 2483 + }, + { + "epoch": 0.6372090040402745, + "grad_norm": 60.24766540527344, + "learning_rate": 5e-06, + "loss": 0.9391, + "num_input_tokens_seen": 313774488, + "step": 2484 + }, + { + "epoch": 0.6372090040402745, + "loss": 0.8841963410377502, + "loss_ce": 0.0018721404485404491, + "loss_iou": 0.4140625, + "loss_num": 0.0107421875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 313774488, + "step": 2484 + }, + { + "epoch": 0.6374655294042199, + "grad_norm": 58.58847427368164, + "learning_rate": 5e-06, + "loss": 0.9444, + "num_input_tokens_seen": 313901560, + "step": 2485 + }, + { + "epoch": 0.6374655294042199, + "loss": 0.7853380441665649, + "loss_ce": 0.0001817881129682064, + "loss_iou": 0.373046875, + "loss_num": 0.00775146484375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 313901560, + "step": 2485 + }, + { + "epoch": 0.6377220547681652, + "grad_norm": 48.0351448059082, + "learning_rate": 5e-06, + "loss": 0.9756, + "num_input_tokens_seen": 314026672, + "step": 2486 + }, + { + "epoch": 0.6377220547681652, + "loss": 0.8512207269668579, + "loss_ce": 0.00014651667152065784, + "loss_iou": 0.400390625, + "loss_num": 0.00994873046875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 314026672, + "step": 2486 + }, + { + "epoch": 0.6379785801321106, + "grad_norm": 56.102012634277344, + "learning_rate": 5e-06, + "loss": 1.0193, + "num_input_tokens_seen": 314153436, + "step": 2487 + }, + { + "epoch": 0.6379785801321106, + "loss": 0.9376398324966431, + "loss_ce": 0.003557785414159298, + "loss_iou": 0.419921875, + "loss_num": 0.018798828125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 314153436, + "step": 2487 + }, + { + "epoch": 0.6382351054960559, + "grad_norm": 60.45938491821289, + "learning_rate": 5e-06, + "loss": 1.0177, + "num_input_tokens_seen": 314279476, + "step": 2488 + }, + { + "epoch": 0.6382351054960559, + "loss": 0.9793537855148315, + "loss_ce": 0.0008381406078115106, + "loss_iou": 0.455078125, + "loss_num": 0.01361083984375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 314279476, + "step": 2488 + }, + { + "epoch": 0.6384916308600013, + "grad_norm": 38.13401412963867, + "learning_rate": 5e-06, + "loss": 1.0787, + "num_input_tokens_seen": 314404940, + "step": 2489 + }, + { + "epoch": 0.6384916308600013, + "loss": 1.1006993055343628, + "loss_ce": 0.0010898895561695099, + "loss_iou": 0.5078125, + "loss_num": 0.0177001953125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 314404940, + "step": 2489 + }, + { + "epoch": 0.6387481562239467, + "grad_norm": 48.75522994995117, + "learning_rate": 5e-06, + "loss": 1.0053, + "num_input_tokens_seen": 314531644, + "step": 2490 + }, + { + "epoch": 0.6387481562239467, + "loss": 0.8262951970100403, + "loss_ce": 0.00012335414066910744, + "loss_iou": 0.392578125, + "loss_num": 0.0079345703125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 314531644, + "step": 2490 + }, + { + "epoch": 0.639004681587892, + "grad_norm": 57.63133239746094, + "learning_rate": 5e-06, + "loss": 0.8711, + "num_input_tokens_seen": 314658012, + "step": 2491 + }, + { + "epoch": 0.639004681587892, + "loss": 0.7972193360328674, + "loss_ce": 0.0030298929195851088, + "loss_iou": 0.37890625, + "loss_num": 0.007110595703125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 314658012, + "step": 2491 + }, + { + "epoch": 0.6392612069518374, + "grad_norm": 50.777530670166016, + "learning_rate": 5e-06, + "loss": 0.9881, + "num_input_tokens_seen": 314783984, + "step": 2492 + }, + { + "epoch": 0.6392612069518374, + "loss": 0.9365330934524536, + "loss_ce": 0.0019627877045422792, + "loss_iou": 0.435546875, + "loss_num": 0.01251220703125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 314783984, + "step": 2492 + }, + { + "epoch": 0.6395177323157827, + "grad_norm": 30.51044464111328, + "learning_rate": 5e-06, + "loss": 1.0218, + "num_input_tokens_seen": 314911264, + "step": 2493 + }, + { + "epoch": 0.6395177323157827, + "loss": 1.036022424697876, + "loss_ce": 0.0013544856337830424, + "loss_iou": 0.482421875, + "loss_num": 0.0137939453125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 314911264, + "step": 2493 + }, + { + "epoch": 0.6397742576797281, + "grad_norm": 35.85038375854492, + "learning_rate": 5e-06, + "loss": 1.0062, + "num_input_tokens_seen": 315037508, + "step": 2494 + }, + { + "epoch": 0.6397742576797281, + "loss": 1.0716043710708618, + "loss_ce": 0.00031530956039205194, + "loss_iou": 0.49609375, + "loss_num": 0.0157470703125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 315037508, + "step": 2494 + }, + { + "epoch": 0.6400307830436734, + "grad_norm": 149.6553955078125, + "learning_rate": 5e-06, + "loss": 0.9438, + "num_input_tokens_seen": 315163584, + "step": 2495 + }, + { + "epoch": 0.6400307830436734, + "loss": 0.8392609357833862, + "loss_ce": 0.0021027191542088985, + "loss_iou": 0.388671875, + "loss_num": 0.01190185546875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 315163584, + "step": 2495 + }, + { + "epoch": 0.6402873084076188, + "grad_norm": 48.40391159057617, + "learning_rate": 5e-06, + "loss": 0.9182, + "num_input_tokens_seen": 315290880, + "step": 2496 + }, + { + "epoch": 0.6402873084076188, + "loss": 0.8231366872787476, + "loss_ce": 0.0013593407347798347, + "loss_iou": 0.38671875, + "loss_num": 0.00982666015625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 315290880, + "step": 2496 + }, + { + "epoch": 0.6405438337715642, + "grad_norm": 43.44719696044922, + "learning_rate": 5e-06, + "loss": 1.0423, + "num_input_tokens_seen": 315417168, + "step": 2497 + }, + { + "epoch": 0.6405438337715642, + "loss": 0.979767918586731, + "loss_ce": 0.004914391320198774, + "loss_iou": 0.443359375, + "loss_num": 0.017822265625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 315417168, + "step": 2497 + }, + { + "epoch": 0.6408003591355095, + "grad_norm": 47.518741607666016, + "learning_rate": 5e-06, + "loss": 0.9214, + "num_input_tokens_seen": 315543600, + "step": 2498 + }, + { + "epoch": 0.6408003591355095, + "loss": 0.9051362872123718, + "loss_ce": 0.000351129740010947, + "loss_iou": 0.421875, + "loss_num": 0.012451171875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 315543600, + "step": 2498 + }, + { + "epoch": 0.6410568844994549, + "grad_norm": 61.92799377441406, + "learning_rate": 5e-06, + "loss": 0.9488, + "num_input_tokens_seen": 315669776, + "step": 2499 + }, + { + "epoch": 0.6410568844994549, + "loss": 0.9454058408737183, + "loss_ce": 0.0010698674013838172, + "loss_iou": 0.44140625, + "loss_num": 0.0125732421875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 315669776, + "step": 2499 + }, + { + "epoch": 0.6413134098634002, + "grad_norm": 44.73543167114258, + "learning_rate": 5e-06, + "loss": 0.9597, + "num_input_tokens_seen": 315795756, + "step": 2500 + }, + { + "epoch": 0.6413134098634002, + "eval_icons_CIoU": 0.1894754022359848, + "eval_icons_GIoU": 0.14396759308874607, + "eval_icons_IoU": 0.38933469355106354, + "eval_icons_MAE_all": 0.033200185745954514, + "eval_icons_MAE_h": 0.05532575212419033, + "eval_icons_MAE_w": 0.05543721467256546, + "eval_icons_MAE_x_boxes": 0.05691156163811684, + "eval_icons_MAE_y_boxes": 0.05602333880960941, + "eval_icons_NUM_probability": 0.9996485114097595, + "eval_icons_inside_bbox": 0.640625, + "eval_icons_loss": 1.8144516944885254, + "eval_icons_loss_ce": 7.383912998193409e-05, + "eval_icons_loss_iou": 0.8223876953125, + "eval_icons_loss_num": 0.0358123779296875, + "eval_icons_loss_xval": 1.82421875, + "eval_icons_runtime": 39.3913, + "eval_icons_samples_per_second": 1.269, + "eval_icons_steps_per_second": 0.051, + "num_input_tokens_seen": 315795756, + "step": 2500 + }, + { + "epoch": 0.6413134098634002, + "eval_screenspot_CIoU": 0.11557815720637639, + "eval_screenspot_GIoU": 0.1087864339351654, + "eval_screenspot_IoU": 0.27850545446077984, + "eval_screenspot_MAE_all": 0.07068674514691035, + "eval_screenspot_MAE_h": 0.05356825515627861, + "eval_screenspot_MAE_w": 0.11678829540808995, + "eval_screenspot_MAE_x_boxes": 0.09224247187376022, + "eval_screenspot_MAE_y_boxes": 0.045117881149053574, + "eval_screenspot_NUM_probability": 0.9999234477678934, + "eval_screenspot_inside_bbox": 0.6225000023841858, + "eval_screenspot_loss": 2.180006504058838, + "eval_screenspot_loss_ce": 0.0014279020445731778, + "eval_screenspot_loss_iou": 0.9212239583333334, + "eval_screenspot_loss_num": 0.07674662272135417, + "eval_screenspot_loss_xval": 2.2259114583333335, + "eval_screenspot_runtime": 68.6871, + "eval_screenspot_samples_per_second": 1.296, + "eval_screenspot_steps_per_second": 0.044, + "num_input_tokens_seen": 315795756, + "step": 2500 + }, + { + "epoch": 0.6413134098634002, + "loss": 2.1584157943725586, + "loss_ce": 0.001189233735203743, + "loss_iou": 0.9140625, + "loss_num": 0.06640625, + "loss_xval": 2.15625, + "num_input_tokens_seen": 315795756, + "step": 2500 + }, + { + "epoch": 0.6415699352273456, + "grad_norm": 23.59415054321289, + "learning_rate": 5e-06, + "loss": 0.93, + "num_input_tokens_seen": 315922888, + "step": 2501 + }, + { + "epoch": 0.6415699352273456, + "loss": 0.8966366052627563, + "loss_ce": 0.0006405095336958766, + "loss_iou": 0.404296875, + "loss_num": 0.017578125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 315922888, + "step": 2501 + }, + { + "epoch": 0.641826460591291, + "grad_norm": 22.13914680480957, + "learning_rate": 5e-06, + "loss": 0.9547, + "num_input_tokens_seen": 316048900, + "step": 2502 + }, + { + "epoch": 0.641826460591291, + "loss": 1.2089340686798096, + "loss_ce": 0.000926303444430232, + "loss_iou": 0.53125, + "loss_num": 0.0286865234375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 316048900, + "step": 2502 + }, + { + "epoch": 0.6420829859552363, + "grad_norm": 22.006704330444336, + "learning_rate": 5e-06, + "loss": 0.9344, + "num_input_tokens_seen": 316173520, + "step": 2503 + }, + { + "epoch": 0.6420829859552363, + "loss": 1.0413873195648193, + "loss_ce": 0.0062311128713190556, + "loss_iou": 0.47265625, + "loss_num": 0.0181884765625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 316173520, + "step": 2503 + }, + { + "epoch": 0.6423395113191817, + "grad_norm": 45.18537521362305, + "learning_rate": 5e-06, + "loss": 0.9591, + "num_input_tokens_seen": 316299780, + "step": 2504 + }, + { + "epoch": 0.6423395113191817, + "loss": 0.98471999168396, + "loss_ce": 0.0010774012189358473, + "loss_iou": 0.4609375, + "loss_num": 0.0118408203125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 316299780, + "step": 2504 + }, + { + "epoch": 0.642596036683127, + "grad_norm": 71.81124877929688, + "learning_rate": 5e-06, + "loss": 1.0914, + "num_input_tokens_seen": 316426540, + "step": 2505 + }, + { + "epoch": 0.642596036683127, + "loss": 1.1884512901306152, + "loss_ce": 0.0058341859839856625, + "loss_iou": 0.54296875, + "loss_num": 0.0191650390625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 316426540, + "step": 2505 + }, + { + "epoch": 0.6428525620470724, + "grad_norm": 53.26790237426758, + "learning_rate": 5e-06, + "loss": 1.0663, + "num_input_tokens_seen": 316553300, + "step": 2506 + }, + { + "epoch": 0.6428525620470724, + "loss": 1.0838321447372437, + "loss_ce": 0.006683701649308205, + "loss_iou": 0.515625, + "loss_num": 0.00921630859375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 316553300, + "step": 2506 + }, + { + "epoch": 0.6431090874110178, + "grad_norm": 30.55912971496582, + "learning_rate": 5e-06, + "loss": 0.9813, + "num_input_tokens_seen": 316679356, + "step": 2507 + }, + { + "epoch": 0.6431090874110178, + "loss": 0.8189159631729126, + "loss_ce": 6.834132364019752e-05, + "loss_iou": 0.392578125, + "loss_num": 0.006866455078125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 316679356, + "step": 2507 + }, + { + "epoch": 0.6433656127749631, + "grad_norm": 38.27816390991211, + "learning_rate": 5e-06, + "loss": 0.9711, + "num_input_tokens_seen": 316805440, + "step": 2508 + }, + { + "epoch": 0.6433656127749631, + "loss": 1.0845415592193604, + "loss_ce": 0.003486872185021639, + "loss_iou": 0.51171875, + "loss_num": 0.011962890625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 316805440, + "step": 2508 + }, + { + "epoch": 0.6436221381389085, + "grad_norm": 62.62095642089844, + "learning_rate": 5e-06, + "loss": 0.9779, + "num_input_tokens_seen": 316931076, + "step": 2509 + }, + { + "epoch": 0.6436221381389085, + "loss": 0.989258885383606, + "loss_ce": 0.006836992222815752, + "loss_iou": 0.44921875, + "loss_num": 0.01708984375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 316931076, + "step": 2509 + }, + { + "epoch": 0.6438786635028538, + "grad_norm": 47.067626953125, + "learning_rate": 5e-06, + "loss": 1.0797, + "num_input_tokens_seen": 317057464, + "step": 2510 + }, + { + "epoch": 0.6438786635028538, + "loss": 1.0920400619506836, + "loss_ce": 0.004637722857296467, + "loss_iou": 0.4921875, + "loss_num": 0.0208740234375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 317057464, + "step": 2510 + }, + { + "epoch": 0.6441351888667992, + "grad_norm": 32.67469024658203, + "learning_rate": 5e-06, + "loss": 0.9837, + "num_input_tokens_seen": 317183812, + "step": 2511 + }, + { + "epoch": 0.6441351888667992, + "loss": 0.8611379861831665, + "loss_ce": 0.0027395517099648714, + "loss_iou": 0.40234375, + "loss_num": 0.01092529296875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 317183812, + "step": 2511 + }, + { + "epoch": 0.6443917142307446, + "grad_norm": 53.10581970214844, + "learning_rate": 5e-06, + "loss": 1.0376, + "num_input_tokens_seen": 317310148, + "step": 2512 + }, + { + "epoch": 0.6443917142307446, + "loss": 1.024294137954712, + "loss_ce": 0.0028096886817365885, + "loss_iou": 0.466796875, + "loss_num": 0.0174560546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 317310148, + "step": 2512 + }, + { + "epoch": 0.64464823959469, + "grad_norm": 76.71137237548828, + "learning_rate": 5e-06, + "loss": 1.0011, + "num_input_tokens_seen": 317436700, + "step": 2513 + }, + { + "epoch": 0.64464823959469, + "loss": 0.9319745302200317, + "loss_ce": 0.0013104160316288471, + "loss_iou": 0.44921875, + "loss_num": 0.0067138671875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 317436700, + "step": 2513 + }, + { + "epoch": 0.6449047649586352, + "grad_norm": 111.10372924804688, + "learning_rate": 5e-06, + "loss": 1.0588, + "num_input_tokens_seen": 317564536, + "step": 2514 + }, + { + "epoch": 0.6449047649586352, + "loss": 1.0919809341430664, + "loss_ce": 0.0006723300321027637, + "loss_iou": 0.50390625, + "loss_num": 0.016357421875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 317564536, + "step": 2514 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 25.80034828186035, + "learning_rate": 5e-06, + "loss": 1.0167, + "num_input_tokens_seen": 317690216, + "step": 2515 + }, + { + "epoch": 0.6451612903225806, + "loss": 1.0635647773742676, + "loss_ce": 8.824904216453433e-05, + "loss_iou": 0.494140625, + "loss_num": 0.014892578125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 317690216, + "step": 2515 + }, + { + "epoch": 0.645417815686526, + "grad_norm": 49.10334396362305, + "learning_rate": 5e-06, + "loss": 0.8933, + "num_input_tokens_seen": 317816888, + "step": 2516 + }, + { + "epoch": 0.645417815686526, + "loss": 0.8991395831108093, + "loss_ce": 0.001190381939522922, + "loss_iou": 0.41796875, + "loss_num": 0.01220703125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 317816888, + "step": 2516 + }, + { + "epoch": 0.6456743410504714, + "grad_norm": 58.33970642089844, + "learning_rate": 5e-06, + "loss": 1.0141, + "num_input_tokens_seen": 317943704, + "step": 2517 + }, + { + "epoch": 0.6456743410504714, + "loss": 0.8808072805404663, + "loss_ce": 0.0004361860337667167, + "loss_iou": 0.40625, + "loss_num": 0.0135498046875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 317943704, + "step": 2517 + }, + { + "epoch": 0.6459308664144168, + "grad_norm": 46.87978744506836, + "learning_rate": 5e-06, + "loss": 0.9654, + "num_input_tokens_seen": 318069668, + "step": 2518 + }, + { + "epoch": 0.6459308664144168, + "loss": 0.9845964908599854, + "loss_ce": 0.00022149203869048506, + "loss_iou": 0.46484375, + "loss_num": 0.01080322265625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 318069668, + "step": 2518 + }, + { + "epoch": 0.646187391778362, + "grad_norm": 44.92280960083008, + "learning_rate": 5e-06, + "loss": 0.8749, + "num_input_tokens_seen": 318197216, + "step": 2519 + }, + { + "epoch": 0.646187391778362, + "loss": 0.7968791127204895, + "loss_ce": 0.0004923823289573193, + "loss_iou": 0.37890625, + "loss_num": 0.008056640625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 318197216, + "step": 2519 + }, + { + "epoch": 0.6464439171423074, + "grad_norm": 45.6769905090332, + "learning_rate": 5e-06, + "loss": 1.0586, + "num_input_tokens_seen": 318323712, + "step": 2520 + }, + { + "epoch": 0.6464439171423074, + "loss": 1.0454797744750977, + "loss_ce": 0.005440776236355305, + "loss_iou": 0.48828125, + "loss_num": 0.01275634765625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 318323712, + "step": 2520 + }, + { + "epoch": 0.6467004425062528, + "grad_norm": 47.695404052734375, + "learning_rate": 5e-06, + "loss": 0.997, + "num_input_tokens_seen": 318449708, + "step": 2521 + }, + { + "epoch": 0.6467004425062528, + "loss": 1.0668721199035645, + "loss_ce": 0.00486042769625783, + "loss_iou": 0.48828125, + "loss_num": 0.017333984375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 318449708, + "step": 2521 + }, + { + "epoch": 0.6469569678701982, + "grad_norm": 42.857452392578125, + "learning_rate": 5e-06, + "loss": 0.9918, + "num_input_tokens_seen": 318575128, + "step": 2522 + }, + { + "epoch": 0.6469569678701982, + "loss": 1.0272332429885864, + "loss_ce": 0.0008660528110340238, + "loss_iou": 0.462890625, + "loss_num": 0.0201416015625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 318575128, + "step": 2522 + }, + { + "epoch": 0.6472134932341436, + "grad_norm": 36.89896011352539, + "learning_rate": 5e-06, + "loss": 0.9211, + "num_input_tokens_seen": 318700912, + "step": 2523 + }, + { + "epoch": 0.6472134932341436, + "loss": 0.9705300331115723, + "loss_ce": 0.0008034645579755306, + "loss_iou": 0.443359375, + "loss_num": 0.0164794921875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 318700912, + "step": 2523 + }, + { + "epoch": 0.6474700185980888, + "grad_norm": 46.29090118408203, + "learning_rate": 5e-06, + "loss": 0.9753, + "num_input_tokens_seen": 318826324, + "step": 2524 + }, + { + "epoch": 0.6474700185980888, + "loss": 1.1603264808654785, + "loss_ce": 0.00017029396258294582, + "loss_iou": 0.52734375, + "loss_num": 0.0205078125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 318826324, + "step": 2524 + }, + { + "epoch": 0.6477265439620342, + "grad_norm": 50.32664489746094, + "learning_rate": 5e-06, + "loss": 1.0048, + "num_input_tokens_seen": 318952612, + "step": 2525 + }, + { + "epoch": 0.6477265439620342, + "loss": 1.0178900957107544, + "loss_ce": 0.004218225833028555, + "loss_iou": 0.47265625, + "loss_num": 0.013671875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 318952612, + "step": 2525 + }, + { + "epoch": 0.6479830693259796, + "grad_norm": 64.36626434326172, + "learning_rate": 5e-06, + "loss": 1.0053, + "num_input_tokens_seen": 319079140, + "step": 2526 + }, + { + "epoch": 0.6479830693259796, + "loss": 1.03044855594635, + "loss_ce": 0.00017510858015157282, + "loss_iou": 0.484375, + "loss_num": 0.01214599609375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 319079140, + "step": 2526 + }, + { + "epoch": 0.648239594689925, + "grad_norm": 63.44362258911133, + "learning_rate": 5e-06, + "loss": 1.0099, + "num_input_tokens_seen": 319206096, + "step": 2527 + }, + { + "epoch": 0.648239594689925, + "loss": 0.9730905890464783, + "loss_ce": 0.003852284513413906, + "loss_iou": 0.43359375, + "loss_num": 0.020751953125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 319206096, + "step": 2527 + }, + { + "epoch": 0.6484961200538704, + "grad_norm": 50.821598052978516, + "learning_rate": 5e-06, + "loss": 1.0448, + "num_input_tokens_seen": 319332544, + "step": 2528 + }, + { + "epoch": 0.6484961200538704, + "loss": 1.1387299299240112, + "loss_ce": 0.0005463565466925502, + "loss_iou": 0.51953125, + "loss_num": 0.0205078125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 319332544, + "step": 2528 + }, + { + "epoch": 0.6487526454178156, + "grad_norm": 35.26225280761719, + "learning_rate": 5e-06, + "loss": 0.9292, + "num_input_tokens_seen": 319457704, + "step": 2529 + }, + { + "epoch": 0.6487526454178156, + "loss": 0.9400994777679443, + "loss_ce": 0.0006464060861617327, + "loss_iou": 0.431640625, + "loss_num": 0.01513671875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 319457704, + "step": 2529 + }, + { + "epoch": 0.649009170781761, + "grad_norm": 41.742889404296875, + "learning_rate": 5e-06, + "loss": 0.9143, + "num_input_tokens_seen": 319584008, + "step": 2530 + }, + { + "epoch": 0.649009170781761, + "loss": 0.9822044372558594, + "loss_ce": 0.0032005212269723415, + "loss_iou": 0.455078125, + "loss_num": 0.01361083984375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 319584008, + "step": 2530 + }, + { + "epoch": 0.6492656961457064, + "grad_norm": 72.13563537597656, + "learning_rate": 5e-06, + "loss": 1.0865, + "num_input_tokens_seen": 319710184, + "step": 2531 + }, + { + "epoch": 0.6492656961457064, + "loss": 1.0516331195831299, + "loss_ce": 0.00011939967225771397, + "loss_iou": 0.474609375, + "loss_num": 0.0205078125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 319710184, + "step": 2531 + }, + { + "epoch": 0.6495222215096518, + "grad_norm": 82.74921417236328, + "learning_rate": 5e-06, + "loss": 0.9771, + "num_input_tokens_seen": 319836612, + "step": 2532 + }, + { + "epoch": 0.6495222215096518, + "loss": 1.0619401931762695, + "loss_ce": 0.00041672700899653137, + "loss_iou": 0.482421875, + "loss_num": 0.0194091796875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 319836612, + "step": 2532 + }, + { + "epoch": 0.6497787468735972, + "grad_norm": 54.184059143066406, + "learning_rate": 5e-06, + "loss": 1.0498, + "num_input_tokens_seen": 319963252, + "step": 2533 + }, + { + "epoch": 0.6497787468735972, + "loss": 1.1017423868179321, + "loss_ce": 0.0011564084561541677, + "loss_iou": 0.5, + "loss_num": 0.0194091796875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 319963252, + "step": 2533 + }, + { + "epoch": 0.6500352722375425, + "grad_norm": 51.53678512573242, + "learning_rate": 5e-06, + "loss": 1.0902, + "num_input_tokens_seen": 320088852, + "step": 2534 + }, + { + "epoch": 0.6500352722375425, + "loss": 1.3015151023864746, + "loss_ce": 0.005128405522555113, + "loss_iou": 0.58203125, + "loss_num": 0.0263671875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 320088852, + "step": 2534 + }, + { + "epoch": 0.6502917976014878, + "grad_norm": 42.27482223510742, + "learning_rate": 5e-06, + "loss": 0.9686, + "num_input_tokens_seen": 320214944, + "step": 2535 + }, + { + "epoch": 0.6502917976014878, + "loss": 1.0335527658462524, + "loss_ce": 0.0010820061434060335, + "loss_iou": 0.46484375, + "loss_num": 0.020751953125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 320214944, + "step": 2535 + }, + { + "epoch": 0.6505483229654332, + "grad_norm": 114.15023803710938, + "learning_rate": 5e-06, + "loss": 0.9784, + "num_input_tokens_seen": 320341852, + "step": 2536 + }, + { + "epoch": 0.6505483229654332, + "loss": 0.8794183731079102, + "loss_ce": 0.0017328441608697176, + "loss_iou": 0.40625, + "loss_num": 0.01263427734375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 320341852, + "step": 2536 + }, + { + "epoch": 0.6508048483293786, + "grad_norm": 47.009220123291016, + "learning_rate": 5e-06, + "loss": 1.0441, + "num_input_tokens_seen": 320467920, + "step": 2537 + }, + { + "epoch": 0.6508048483293786, + "loss": 1.2648087739944458, + "loss_ce": 0.00431071687489748, + "loss_iou": 0.53515625, + "loss_num": 0.0380859375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 320467920, + "step": 2537 + }, + { + "epoch": 0.651061373693324, + "grad_norm": 51.10209655761719, + "learning_rate": 5e-06, + "loss": 1.074, + "num_input_tokens_seen": 320593432, + "step": 2538 + }, + { + "epoch": 0.651061373693324, + "loss": 1.019327163696289, + "loss_ce": 0.0012607639655470848, + "loss_iou": 0.46484375, + "loss_num": 0.01806640625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 320593432, + "step": 2538 + }, + { + "epoch": 0.6513178990572693, + "grad_norm": 69.9461898803711, + "learning_rate": 5e-06, + "loss": 1.1117, + "num_input_tokens_seen": 320720700, + "step": 2539 + }, + { + "epoch": 0.6513178990572693, + "loss": 1.1479278802871704, + "loss_ce": 0.00022277333482634276, + "loss_iou": 0.53125, + "loss_num": 0.016845703125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 320720700, + "step": 2539 + }, + { + "epoch": 0.6515744244212146, + "grad_norm": 50.35648727416992, + "learning_rate": 5e-06, + "loss": 1.0686, + "num_input_tokens_seen": 320846244, + "step": 2540 + }, + { + "epoch": 0.6515744244212146, + "loss": 1.0237176418304443, + "loss_ce": 0.0002800257643684745, + "loss_iou": 0.4765625, + "loss_num": 0.0140380859375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 320846244, + "step": 2540 + }, + { + "epoch": 0.65183094978516, + "grad_norm": 22.305009841918945, + "learning_rate": 5e-06, + "loss": 0.8449, + "num_input_tokens_seen": 320971512, + "step": 2541 + }, + { + "epoch": 0.65183094978516, + "loss": 0.6406305432319641, + "loss_ce": 0.0004938304773531854, + "loss_iou": 0.306640625, + "loss_num": 0.0054931640625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 320971512, + "step": 2541 + }, + { + "epoch": 0.6520874751491054, + "grad_norm": 27.07823944091797, + "learning_rate": 5e-06, + "loss": 1.0071, + "num_input_tokens_seen": 321097656, + "step": 2542 + }, + { + "epoch": 0.6520874751491054, + "loss": 1.2131917476654053, + "loss_ce": 0.0012776607181876898, + "loss_iou": 0.546875, + "loss_num": 0.0230712890625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 321097656, + "step": 2542 + }, + { + "epoch": 0.6523440005130507, + "grad_norm": 28.77030372619629, + "learning_rate": 5e-06, + "loss": 1.0164, + "num_input_tokens_seen": 321224284, + "step": 2543 + }, + { + "epoch": 0.6523440005130507, + "loss": 0.9516527056694031, + "loss_ce": 0.0009691096493043005, + "loss_iou": 0.44140625, + "loss_num": 0.0137939453125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 321224284, + "step": 2543 + }, + { + "epoch": 0.6526005258769961, + "grad_norm": 35.22871017456055, + "learning_rate": 5e-06, + "loss": 0.9432, + "num_input_tokens_seen": 321350976, + "step": 2544 + }, + { + "epoch": 0.6526005258769961, + "loss": 0.8475707769393921, + "loss_ce": 0.00479731522500515, + "loss_iou": 0.380859375, + "loss_num": 0.0166015625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 321350976, + "step": 2544 + }, + { + "epoch": 0.6528570512409414, + "grad_norm": 71.83428192138672, + "learning_rate": 5e-06, + "loss": 0.9466, + "num_input_tokens_seen": 321478208, + "step": 2545 + }, + { + "epoch": 0.6528570512409414, + "loss": 0.9686086177825928, + "loss_ce": 0.0013235016958788037, + "loss_iou": 0.455078125, + "loss_num": 0.0118408203125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 321478208, + "step": 2545 + }, + { + "epoch": 0.6531135766048868, + "grad_norm": 60.73153305053711, + "learning_rate": 5e-06, + "loss": 1.0345, + "num_input_tokens_seen": 321605096, + "step": 2546 + }, + { + "epoch": 0.6531135766048868, + "loss": 1.1399184465408325, + "loss_ce": 0.0007583254482597113, + "loss_iou": 0.51953125, + "loss_num": 0.0194091796875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 321605096, + "step": 2546 + }, + { + "epoch": 0.6533701019688322, + "grad_norm": 59.767982482910156, + "learning_rate": 5e-06, + "loss": 1.0058, + "num_input_tokens_seen": 321731936, + "step": 2547 + }, + { + "epoch": 0.6533701019688322, + "loss": 0.9773869514465332, + "loss_ce": 0.001800987869501114, + "loss_iou": 0.4453125, + "loss_num": 0.0169677734375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 321731936, + "step": 2547 + }, + { + "epoch": 0.6536266273327775, + "grad_norm": 51.63563537597656, + "learning_rate": 5e-06, + "loss": 0.9713, + "num_input_tokens_seen": 321858464, + "step": 2548 + }, + { + "epoch": 0.6536266273327775, + "loss": 0.9061453342437744, + "loss_ce": 0.0008718846365809441, + "loss_iou": 0.423828125, + "loss_num": 0.01129150390625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 321858464, + "step": 2548 + }, + { + "epoch": 0.6538831526967229, + "grad_norm": 36.578731536865234, + "learning_rate": 5e-06, + "loss": 0.9314, + "num_input_tokens_seen": 321984984, + "step": 2549 + }, + { + "epoch": 0.6538831526967229, + "loss": 0.7994917035102844, + "loss_ce": 0.002616706769913435, + "loss_iou": 0.359375, + "loss_num": 0.015380859375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 321984984, + "step": 2549 + }, + { + "epoch": 0.6541396780606682, + "grad_norm": 53.078128814697266, + "learning_rate": 5e-06, + "loss": 0.9987, + "num_input_tokens_seen": 322111476, + "step": 2550 + }, + { + "epoch": 0.6541396780606682, + "loss": 1.1092263460159302, + "loss_ce": 0.0013161643873900175, + "loss_iou": 0.5078125, + "loss_num": 0.0181884765625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 322111476, + "step": 2550 + }, + { + "epoch": 0.6543962034246136, + "grad_norm": 61.14692306518555, + "learning_rate": 5e-06, + "loss": 0.9398, + "num_input_tokens_seen": 322238732, + "step": 2551 + }, + { + "epoch": 0.6543962034246136, + "loss": 0.8002679347991943, + "loss_ce": 0.0011956471716985106, + "loss_iou": 0.376953125, + "loss_num": 0.009521484375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 322238732, + "step": 2551 + }, + { + "epoch": 0.654652728788559, + "grad_norm": 46.45836639404297, + "learning_rate": 5e-06, + "loss": 0.9514, + "num_input_tokens_seen": 322363184, + "step": 2552 + }, + { + "epoch": 0.654652728788559, + "loss": 0.9463776350021362, + "loss_ce": 0.0010651350021362305, + "loss_iou": 0.4375, + "loss_num": 0.0142822265625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 322363184, + "step": 2552 + }, + { + "epoch": 0.6549092541525043, + "grad_norm": 39.87617111206055, + "learning_rate": 5e-06, + "loss": 0.934, + "num_input_tokens_seen": 322490044, + "step": 2553 + }, + { + "epoch": 0.6549092541525043, + "loss": 0.9073050022125244, + "loss_ce": 0.0010549655416980386, + "loss_iou": 0.43359375, + "loss_num": 0.0078125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 322490044, + "step": 2553 + }, + { + "epoch": 0.6551657795164497, + "grad_norm": 47.76124572753906, + "learning_rate": 5e-06, + "loss": 1.0452, + "num_input_tokens_seen": 322616152, + "step": 2554 + }, + { + "epoch": 0.6551657795164497, + "loss": 1.3401833772659302, + "loss_ce": 0.0042458572424948215, + "loss_iou": 0.6015625, + "loss_num": 0.026123046875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 322616152, + "step": 2554 + }, + { + "epoch": 0.6554223048803951, + "grad_norm": 69.3962631225586, + "learning_rate": 5e-06, + "loss": 0.8953, + "num_input_tokens_seen": 322742764, + "step": 2555 + }, + { + "epoch": 0.6554223048803951, + "loss": 0.8622629642486572, + "loss_ce": 0.00093482417287305, + "loss_iou": 0.3984375, + "loss_num": 0.012939453125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 322742764, + "step": 2555 + }, + { + "epoch": 0.6556788302443404, + "grad_norm": 48.42595291137695, + "learning_rate": 5e-06, + "loss": 0.9537, + "num_input_tokens_seen": 322869668, + "step": 2556 + }, + { + "epoch": 0.6556788302443404, + "loss": 0.9341844320297241, + "loss_ce": 0.00010241439304081723, + "loss_iou": 0.447265625, + "loss_num": 0.00799560546875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 322869668, + "step": 2556 + }, + { + "epoch": 0.6559353556082858, + "grad_norm": 133.43043518066406, + "learning_rate": 5e-06, + "loss": 1.0679, + "num_input_tokens_seen": 322996704, + "step": 2557 + }, + { + "epoch": 0.6559353556082858, + "loss": 1.2168223857879639, + "loss_ce": 0.002466895617544651, + "loss_iou": 0.5546875, + "loss_num": 0.020263671875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 322996704, + "step": 2557 + }, + { + "epoch": 0.6561918809722311, + "grad_norm": 56.74580383300781, + "learning_rate": 5e-06, + "loss": 1.0205, + "num_input_tokens_seen": 323122892, + "step": 2558 + }, + { + "epoch": 0.6561918809722311, + "loss": 1.127679467201233, + "loss_ce": 0.00023803164367564023, + "loss_iou": 0.5234375, + "loss_num": 0.0164794921875, + "loss_xval": 1.125, + "num_input_tokens_seen": 323122892, + "step": 2558 + }, + { + "epoch": 0.6564484063361765, + "grad_norm": 53.315433502197266, + "learning_rate": 5e-06, + "loss": 1.0803, + "num_input_tokens_seen": 323249196, + "step": 2559 + }, + { + "epoch": 0.6564484063361765, + "loss": 1.070936918258667, + "loss_ce": 0.0011127182515338063, + "loss_iou": 0.5, + "loss_num": 0.013916015625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 323249196, + "step": 2559 + }, + { + "epoch": 0.6567049317001219, + "grad_norm": 52.62623977661133, + "learning_rate": 5e-06, + "loss": 0.8037, + "num_input_tokens_seen": 323375368, + "step": 2560 + }, + { + "epoch": 0.6567049317001219, + "loss": 0.8856045603752136, + "loss_ce": 0.00010653733625076711, + "loss_iou": 0.408203125, + "loss_num": 0.01397705078125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 323375368, + "step": 2560 + }, + { + "epoch": 0.6569614570640672, + "grad_norm": 140.98773193359375, + "learning_rate": 5e-06, + "loss": 0.9971, + "num_input_tokens_seen": 323500776, + "step": 2561 + }, + { + "epoch": 0.6569614570640672, + "loss": 1.021779179573059, + "loss_ce": 0.003224445739760995, + "loss_iou": 0.4765625, + "loss_num": 0.0130615234375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 323500776, + "step": 2561 + }, + { + "epoch": 0.6572179824280125, + "grad_norm": 60.800437927246094, + "learning_rate": 5e-06, + "loss": 0.843, + "num_input_tokens_seen": 323627980, + "step": 2562 + }, + { + "epoch": 0.6572179824280125, + "loss": 1.0431504249572754, + "loss_ce": 0.002379023004323244, + "loss_iou": 0.48046875, + "loss_num": 0.015869140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 323627980, + "step": 2562 + }, + { + "epoch": 0.6574745077919579, + "grad_norm": 48.72665023803711, + "learning_rate": 5e-06, + "loss": 1.0235, + "num_input_tokens_seen": 323753764, + "step": 2563 + }, + { + "epoch": 0.6574745077919579, + "loss": 1.0337119102478027, + "loss_ce": 0.0019735852256417274, + "loss_iou": 0.4765625, + "loss_num": 0.0157470703125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 323753764, + "step": 2563 + }, + { + "epoch": 0.6577310331559033, + "grad_norm": 38.067832946777344, + "learning_rate": 5e-06, + "loss": 0.9222, + "num_input_tokens_seen": 323879928, + "step": 2564 + }, + { + "epoch": 0.6577310331559033, + "loss": 1.0303575992584229, + "loss_ce": 0.002525636926293373, + "loss_iou": 0.46484375, + "loss_num": 0.01953125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 323879928, + "step": 2564 + }, + { + "epoch": 0.6579875585198487, + "grad_norm": 80.7061767578125, + "learning_rate": 5e-06, + "loss": 0.9469, + "num_input_tokens_seen": 324005148, + "step": 2565 + }, + { + "epoch": 0.6579875585198487, + "loss": 0.952646791934967, + "loss_ce": 0.0002542136353440583, + "loss_iou": 0.443359375, + "loss_num": 0.01348876953125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 324005148, + "step": 2565 + }, + { + "epoch": 0.658244083883794, + "grad_norm": 26.289743423461914, + "learning_rate": 5e-06, + "loss": 1.0903, + "num_input_tokens_seen": 324133056, + "step": 2566 + }, + { + "epoch": 0.658244083883794, + "loss": 1.0004773139953613, + "loss_ce": 0.0034068753011524677, + "loss_iou": 0.447265625, + "loss_num": 0.0208740234375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 324133056, + "step": 2566 + }, + { + "epoch": 0.6585006092477393, + "grad_norm": 37.30583190917969, + "learning_rate": 5e-06, + "loss": 0.9683, + "num_input_tokens_seen": 324259220, + "step": 2567 + }, + { + "epoch": 0.6585006092477393, + "loss": 1.1844403743743896, + "loss_ce": 0.002555547747761011, + "loss_iou": 0.53125, + "loss_num": 0.0238037109375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 324259220, + "step": 2567 + }, + { + "epoch": 0.6587571346116847, + "grad_norm": 52.61225509643555, + "learning_rate": 5e-06, + "loss": 0.9213, + "num_input_tokens_seen": 324384648, + "step": 2568 + }, + { + "epoch": 0.6587571346116847, + "loss": 0.7872378826141357, + "loss_ce": 0.0037906006909906864, + "loss_iou": 0.365234375, + "loss_num": 0.01055908203125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 324384648, + "step": 2568 + }, + { + "epoch": 0.6590136599756301, + "grad_norm": 36.730262756347656, + "learning_rate": 5e-06, + "loss": 0.9374, + "num_input_tokens_seen": 324509764, + "step": 2569 + }, + { + "epoch": 0.6590136599756301, + "loss": 0.8289806842803955, + "loss_ce": 0.0003673451137728989, + "loss_iou": 0.392578125, + "loss_num": 0.0084228515625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 324509764, + "step": 2569 + }, + { + "epoch": 0.6592701853395755, + "grad_norm": 20.297969818115234, + "learning_rate": 5e-06, + "loss": 0.9325, + "num_input_tokens_seen": 324636200, + "step": 2570 + }, + { + "epoch": 0.6592701853395755, + "loss": 0.8867063522338867, + "loss_ce": 0.0009641729993745685, + "loss_iou": 0.416015625, + "loss_num": 0.010986328125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 324636200, + "step": 2570 + }, + { + "epoch": 0.6595267107035208, + "grad_norm": 28.509300231933594, + "learning_rate": 5e-06, + "loss": 1.0163, + "num_input_tokens_seen": 324763080, + "step": 2571 + }, + { + "epoch": 0.6595267107035208, + "loss": 1.1916611194610596, + "loss_ce": 0.000743173121009022, + "loss_iou": 0.53515625, + "loss_num": 0.024169921875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 324763080, + "step": 2571 + }, + { + "epoch": 0.6597832360674661, + "grad_norm": 46.672637939453125, + "learning_rate": 5e-06, + "loss": 0.9832, + "num_input_tokens_seen": 324889748, + "step": 2572 + }, + { + "epoch": 0.6597832360674661, + "loss": 1.0805200338363647, + "loss_ce": 0.00044189533218741417, + "loss_iou": 0.490234375, + "loss_num": 0.020263671875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 324889748, + "step": 2572 + }, + { + "epoch": 0.6600397614314115, + "grad_norm": 44.58865737915039, + "learning_rate": 5e-06, + "loss": 1.0301, + "num_input_tokens_seen": 325017076, + "step": 2573 + }, + { + "epoch": 0.6600397614314115, + "loss": 0.9069502949714661, + "loss_ce": 0.0002119986602338031, + "loss_iou": 0.4296875, + "loss_num": 0.00927734375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 325017076, + "step": 2573 + }, + { + "epoch": 0.6602962867953569, + "grad_norm": 47.87212371826172, + "learning_rate": 5e-06, + "loss": 0.9088, + "num_input_tokens_seen": 325143588, + "step": 2574 + }, + { + "epoch": 0.6602962867953569, + "loss": 0.8963819742202759, + "loss_ce": 0.0008741815690882504, + "loss_iou": 0.421875, + "loss_num": 0.010498046875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 325143588, + "step": 2574 + }, + { + "epoch": 0.6605528121593023, + "grad_norm": 78.64407348632812, + "learning_rate": 5e-06, + "loss": 0.9949, + "num_input_tokens_seen": 325268836, + "step": 2575 + }, + { + "epoch": 0.6605528121593023, + "loss": 1.231353998184204, + "loss_ce": 0.0018617615569382906, + "loss_iou": 0.5390625, + "loss_num": 0.02978515625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 325268836, + "step": 2575 + }, + { + "epoch": 0.6608093375232477, + "grad_norm": 62.79936599731445, + "learning_rate": 5e-06, + "loss": 0.9037, + "num_input_tokens_seen": 325394712, + "step": 2576 + }, + { + "epoch": 0.6608093375232477, + "loss": 0.9758100509643555, + "loss_ce": 0.0014448176370933652, + "loss_iou": 0.4375, + "loss_num": 0.02001953125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 325394712, + "step": 2576 + }, + { + "epoch": 0.6610658628871929, + "grad_norm": 57.34217834472656, + "learning_rate": 5e-06, + "loss": 0.9979, + "num_input_tokens_seen": 325521336, + "step": 2577 + }, + { + "epoch": 0.6610658628871929, + "loss": 0.9856201410293579, + "loss_ce": 0.00026858298224397004, + "loss_iou": 0.4609375, + "loss_num": 0.0128173828125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 325521336, + "step": 2577 + }, + { + "epoch": 0.6613223882511383, + "grad_norm": 40.77871322631836, + "learning_rate": 5e-06, + "loss": 0.9735, + "num_input_tokens_seen": 325648140, + "step": 2578 + }, + { + "epoch": 0.6613223882511383, + "loss": 0.7677271366119385, + "loss_ce": 0.00014898774679750204, + "loss_iou": 0.365234375, + "loss_num": 0.007568359375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 325648140, + "step": 2578 + }, + { + "epoch": 0.6615789136150837, + "grad_norm": 56.08229064941406, + "learning_rate": 5e-06, + "loss": 0.9773, + "num_input_tokens_seen": 325774620, + "step": 2579 + }, + { + "epoch": 0.6615789136150837, + "loss": 0.8560376167297363, + "loss_ce": 0.0008130334899760783, + "loss_iou": 0.404296875, + "loss_num": 0.00909423828125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 325774620, + "step": 2579 + }, + { + "epoch": 0.6618354389790291, + "grad_norm": 51.157928466796875, + "learning_rate": 5e-06, + "loss": 1.021, + "num_input_tokens_seen": 325901264, + "step": 2580 + }, + { + "epoch": 0.6618354389790291, + "loss": 0.8419246077537537, + "loss_ce": 0.0006160178454592824, + "loss_iou": 0.39453125, + "loss_num": 0.010498046875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 325901264, + "step": 2580 + }, + { + "epoch": 0.6620919643429745, + "grad_norm": 50.69013977050781, + "learning_rate": 5e-06, + "loss": 0.9693, + "num_input_tokens_seen": 326027708, + "step": 2581 + }, + { + "epoch": 0.6620919643429745, + "loss": 1.0594018697738647, + "loss_ce": 0.004226113203912973, + "loss_iou": 0.49609375, + "loss_num": 0.012939453125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 326027708, + "step": 2581 + }, + { + "epoch": 0.6623484897069197, + "grad_norm": 121.15338134765625, + "learning_rate": 5e-06, + "loss": 0.9881, + "num_input_tokens_seen": 326154376, + "step": 2582 + }, + { + "epoch": 0.6623484897069197, + "loss": 1.1084377765655518, + "loss_ce": 0.001015805988572538, + "loss_iou": 0.50390625, + "loss_num": 0.0203857421875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 326154376, + "step": 2582 + }, + { + "epoch": 0.6626050150708651, + "grad_norm": 61.08228302001953, + "learning_rate": 5e-06, + "loss": 1.0139, + "num_input_tokens_seen": 326280432, + "step": 2583 + }, + { + "epoch": 0.6626050150708651, + "loss": 0.8925979733467102, + "loss_ce": 0.0005081618437543511, + "loss_iou": 0.4140625, + "loss_num": 0.01251220703125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 326280432, + "step": 2583 + }, + { + "epoch": 0.6628615404348105, + "grad_norm": 35.20524978637695, + "learning_rate": 5e-06, + "loss": 0.9465, + "num_input_tokens_seen": 326406812, + "step": 2584 + }, + { + "epoch": 0.6628615404348105, + "loss": 1.0542138814926147, + "loss_ce": 0.0014795222086831927, + "loss_iou": 0.4765625, + "loss_num": 0.020263671875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 326406812, + "step": 2584 + }, + { + "epoch": 0.6631180657987559, + "grad_norm": 50.86079025268555, + "learning_rate": 5e-06, + "loss": 0.9724, + "num_input_tokens_seen": 326533680, + "step": 2585 + }, + { + "epoch": 0.6631180657987559, + "loss": 0.8301674127578735, + "loss_ce": 8.923219866119325e-05, + "loss_iou": 0.390625, + "loss_num": 0.00958251953125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 326533680, + "step": 2585 + }, + { + "epoch": 0.6633745911627013, + "grad_norm": 68.34303283691406, + "learning_rate": 5e-06, + "loss": 0.933, + "num_input_tokens_seen": 326660376, + "step": 2586 + }, + { + "epoch": 0.6633745911627013, + "loss": 0.867810845375061, + "loss_ce": 0.00013504312664736062, + "loss_iou": 0.412109375, + "loss_num": 0.00897216796875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 326660376, + "step": 2586 + }, + { + "epoch": 0.6636311165266465, + "grad_norm": 49.95848083496094, + "learning_rate": 5e-06, + "loss": 0.9739, + "num_input_tokens_seen": 326786964, + "step": 2587 + }, + { + "epoch": 0.6636311165266465, + "loss": 0.9336749315261841, + "loss_ce": 0.0010577525245025754, + "loss_iou": 0.435546875, + "loss_num": 0.012451171875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 326786964, + "step": 2587 + }, + { + "epoch": 0.6638876418905919, + "grad_norm": 37.53621292114258, + "learning_rate": 5e-06, + "loss": 0.9877, + "num_input_tokens_seen": 326912772, + "step": 2588 + }, + { + "epoch": 0.6638876418905919, + "loss": 0.9066030383110046, + "loss_ce": 0.0003530262620188296, + "loss_iou": 0.4375, + "loss_num": 0.006622314453125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 326912772, + "step": 2588 + }, + { + "epoch": 0.6641441672545373, + "grad_norm": 29.56305694580078, + "learning_rate": 5e-06, + "loss": 0.9645, + "num_input_tokens_seen": 327039104, + "step": 2589 + }, + { + "epoch": 0.6641441672545373, + "loss": 0.9219417572021484, + "loss_ce": 0.0005550056230276823, + "loss_iou": 0.421875, + "loss_num": 0.015380859375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 327039104, + "step": 2589 + }, + { + "epoch": 0.6644006926184827, + "grad_norm": 45.28819274902344, + "learning_rate": 5e-06, + "loss": 1.0259, + "num_input_tokens_seen": 327164716, + "step": 2590 + }, + { + "epoch": 0.6644006926184827, + "loss": 1.0519031286239624, + "loss_ce": 0.002098442055284977, + "loss_iou": 0.48046875, + "loss_num": 0.0174560546875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 327164716, + "step": 2590 + }, + { + "epoch": 0.664657217982428, + "grad_norm": 48.15447235107422, + "learning_rate": 5e-06, + "loss": 0.9972, + "num_input_tokens_seen": 327290484, + "step": 2591 + }, + { + "epoch": 0.664657217982428, + "loss": 0.9134647250175476, + "loss_ce": 0.0003788552130572498, + "loss_iou": 0.435546875, + "loss_num": 0.0087890625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 327290484, + "step": 2591 + }, + { + "epoch": 0.6649137433463733, + "grad_norm": 54.98477554321289, + "learning_rate": 5e-06, + "loss": 0.9299, + "num_input_tokens_seen": 327416936, + "step": 2592 + }, + { + "epoch": 0.6649137433463733, + "loss": 0.8780367970466614, + "loss_ce": 0.0003512462426442653, + "loss_iou": 0.41015625, + "loss_num": 0.01141357421875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 327416936, + "step": 2592 + }, + { + "epoch": 0.6651702687103187, + "grad_norm": 58.28336715698242, + "learning_rate": 5e-06, + "loss": 1.0213, + "num_input_tokens_seen": 327543848, + "step": 2593 + }, + { + "epoch": 0.6651702687103187, + "loss": 1.259503722190857, + "loss_ce": 0.0002263898350065574, + "loss_iou": 0.57421875, + "loss_num": 0.0224609375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 327543848, + "step": 2593 + }, + { + "epoch": 0.6654267940742641, + "grad_norm": 60.74911117553711, + "learning_rate": 5e-06, + "loss": 0.9964, + "num_input_tokens_seen": 327670360, + "step": 2594 + }, + { + "epoch": 0.6654267940742641, + "loss": 0.8540535569190979, + "loss_ce": 0.0005379447829909623, + "loss_iou": 0.40625, + "loss_num": 0.0078125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 327670360, + "step": 2594 + }, + { + "epoch": 0.6656833194382095, + "grad_norm": 44.802677154541016, + "learning_rate": 5e-06, + "loss": 0.9963, + "num_input_tokens_seen": 327796636, + "step": 2595 + }, + { + "epoch": 0.6656833194382095, + "loss": 1.0499944686889648, + "loss_ce": 0.0011662642937153578, + "loss_iou": 0.48046875, + "loss_num": 0.017822265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 327796636, + "step": 2595 + }, + { + "epoch": 0.6659398448021548, + "grad_norm": 43.667240142822266, + "learning_rate": 5e-06, + "loss": 0.9097, + "num_input_tokens_seen": 327921956, + "step": 2596 + }, + { + "epoch": 0.6659398448021548, + "loss": 1.2363719940185547, + "loss_ce": 0.00492672435939312, + "loss_iou": 0.53515625, + "loss_num": 0.032470703125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 327921956, + "step": 2596 + }, + { + "epoch": 0.6661963701661002, + "grad_norm": 49.90669631958008, + "learning_rate": 5e-06, + "loss": 1.0292, + "num_input_tokens_seen": 328048468, + "step": 2597 + }, + { + "epoch": 0.6661963701661002, + "loss": 0.9330325722694397, + "loss_ce": 0.00041541692917235196, + "loss_iou": 0.44140625, + "loss_num": 0.00970458984375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 328048468, + "step": 2597 + }, + { + "epoch": 0.6664528955300455, + "grad_norm": 54.853179931640625, + "learning_rate": 5e-06, + "loss": 1.0659, + "num_input_tokens_seen": 328173756, + "step": 2598 + }, + { + "epoch": 0.6664528955300455, + "loss": 1.071955680847168, + "loss_ce": 0.0026196991093456745, + "loss_iou": 0.5, + "loss_num": 0.01312255859375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 328173756, + "step": 2598 + }, + { + "epoch": 0.6667094208939909, + "grad_norm": 48.6937255859375, + "learning_rate": 5e-06, + "loss": 1.0304, + "num_input_tokens_seen": 328300296, + "step": 2599 + }, + { + "epoch": 0.6667094208939909, + "loss": 1.1512596607208252, + "loss_ce": 0.0008689466631039977, + "loss_iou": 0.5234375, + "loss_num": 0.0198974609375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 328300296, + "step": 2599 + }, + { + "epoch": 0.6669659462579363, + "grad_norm": 49.37694549560547, + "learning_rate": 5e-06, + "loss": 0.8213, + "num_input_tokens_seen": 328425796, + "step": 2600 + }, + { + "epoch": 0.6669659462579363, + "loss": 0.8191945552825928, + "loss_ce": 0.0005910039180889726, + "loss_iou": 0.384765625, + "loss_num": 0.009765625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 328425796, + "step": 2600 + }, + { + "epoch": 0.6672224716218816, + "grad_norm": 62.02981185913086, + "learning_rate": 5e-06, + "loss": 1.0209, + "num_input_tokens_seen": 328551904, + "step": 2601 + }, + { + "epoch": 0.6672224716218816, + "loss": 1.0754212141036987, + "loss_ce": 0.00022586580598726869, + "loss_iou": 0.5, + "loss_num": 0.01483154296875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 328551904, + "step": 2601 + }, + { + "epoch": 0.667478996985827, + "grad_norm": 36.73429870605469, + "learning_rate": 5e-06, + "loss": 0.9028, + "num_input_tokens_seen": 328678148, + "step": 2602 + }, + { + "epoch": 0.667478996985827, + "loss": 0.8765177130699158, + "loss_ce": 0.0034708520397543907, + "loss_iou": 0.416015625, + "loss_num": 0.00830078125, + "loss_xval": 0.875, + "num_input_tokens_seen": 328678148, + "step": 2602 + }, + { + "epoch": 0.6677355223497723, + "grad_norm": 43.596805572509766, + "learning_rate": 5e-06, + "loss": 0.9298, + "num_input_tokens_seen": 328803636, + "step": 2603 + }, + { + "epoch": 0.6677355223497723, + "loss": 0.822252631187439, + "loss_ce": 0.0004752951208502054, + "loss_iou": 0.390625, + "loss_num": 0.00836181640625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 328803636, + "step": 2603 + }, + { + "epoch": 0.6679920477137177, + "grad_norm": 58.305667877197266, + "learning_rate": 5e-06, + "loss": 0.829, + "num_input_tokens_seen": 328928916, + "step": 2604 + }, + { + "epoch": 0.6679920477137177, + "loss": 0.7055681347846985, + "loss_ce": 0.000489999249111861, + "loss_iou": 0.33203125, + "loss_num": 0.008056640625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 328928916, + "step": 2604 + }, + { + "epoch": 0.668248573077663, + "grad_norm": 41.25015640258789, + "learning_rate": 5e-06, + "loss": 0.977, + "num_input_tokens_seen": 329054296, + "step": 2605 + }, + { + "epoch": 0.668248573077663, + "loss": 0.8563076257705688, + "loss_ce": 0.00035060258232988417, + "loss_iou": 0.412109375, + "loss_num": 0.006683349609375, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 329054296, + "step": 2605 + }, + { + "epoch": 0.6685050984416084, + "grad_norm": 17.951494216918945, + "learning_rate": 5e-06, + "loss": 0.8571, + "num_input_tokens_seen": 329181108, + "step": 2606 + }, + { + "epoch": 0.6685050984416084, + "loss": 0.9497910141944885, + "loss_ce": 0.0015487968921661377, + "loss_iou": 0.447265625, + "loss_num": 0.0103759765625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 329181108, + "step": 2606 + }, + { + "epoch": 0.6687616238055538, + "grad_norm": 28.445119857788086, + "learning_rate": 5e-06, + "loss": 0.8681, + "num_input_tokens_seen": 329307088, + "step": 2607 + }, + { + "epoch": 0.6687616238055538, + "loss": 0.6227301359176636, + "loss_ce": 0.00017157517140731215, + "loss_iou": 0.296875, + "loss_num": 0.005523681640625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 329307088, + "step": 2607 + }, + { + "epoch": 0.6690181491694991, + "grad_norm": 60.301429748535156, + "learning_rate": 5e-06, + "loss": 0.9719, + "num_input_tokens_seen": 329433844, + "step": 2608 + }, + { + "epoch": 0.6690181491694991, + "loss": 0.8493772745132446, + "loss_ce": 0.0005003456026315689, + "loss_iou": 0.400390625, + "loss_num": 0.0093994140625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 329433844, + "step": 2608 + }, + { + "epoch": 0.6692746745334445, + "grad_norm": 64.60432434082031, + "learning_rate": 5e-06, + "loss": 1.0147, + "num_input_tokens_seen": 329560060, + "step": 2609 + }, + { + "epoch": 0.6692746745334445, + "loss": 0.8359988927841187, + "loss_ce": 0.0010379781015217304, + "loss_iou": 0.384765625, + "loss_num": 0.01287841796875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 329560060, + "step": 2609 + }, + { + "epoch": 0.6695311998973899, + "grad_norm": 48.75896072387695, + "learning_rate": 5e-06, + "loss": 0.8759, + "num_input_tokens_seen": 329686476, + "step": 2610 + }, + { + "epoch": 0.6695311998973899, + "loss": 0.8272218704223633, + "loss_ce": 7.33964770915918e-05, + "loss_iou": 0.3828125, + "loss_num": 0.01190185546875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 329686476, + "step": 2610 + }, + { + "epoch": 0.6697877252613352, + "grad_norm": 39.09230041503906, + "learning_rate": 5e-06, + "loss": 0.9767, + "num_input_tokens_seen": 329813240, + "step": 2611 + }, + { + "epoch": 0.6697877252613352, + "loss": 1.2717763185501099, + "loss_ce": 0.0017568380571901798, + "loss_iou": 0.5625, + "loss_num": 0.0296630859375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 329813240, + "step": 2611 + }, + { + "epoch": 0.6700442506252806, + "grad_norm": 53.445068359375, + "learning_rate": 5e-06, + "loss": 1.0295, + "num_input_tokens_seen": 329940128, + "step": 2612 + }, + { + "epoch": 0.6700442506252806, + "loss": 0.9662352800369263, + "loss_ce": 0.00041496381163597107, + "loss_iou": 0.451171875, + "loss_num": 0.0125732421875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 329940128, + "step": 2612 + }, + { + "epoch": 0.6703007759892259, + "grad_norm": 59.1682243347168, + "learning_rate": 5e-06, + "loss": 0.9191, + "num_input_tokens_seen": 330066240, + "step": 2613 + }, + { + "epoch": 0.6703007759892259, + "loss": 1.0424437522888184, + "loss_ce": 0.0024047736078500748, + "loss_iou": 0.474609375, + "loss_num": 0.0181884765625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 330066240, + "step": 2613 + }, + { + "epoch": 0.6705573013531713, + "grad_norm": 53.199684143066406, + "learning_rate": 5e-06, + "loss": 1.0157, + "num_input_tokens_seen": 330192580, + "step": 2614 + }, + { + "epoch": 0.6705573013531713, + "loss": 0.9505829215049744, + "loss_ce": 0.00038759096059948206, + "loss_iou": 0.443359375, + "loss_num": 0.0126953125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 330192580, + "step": 2614 + }, + { + "epoch": 0.6708138267171166, + "grad_norm": 54.543704986572266, + "learning_rate": 5e-06, + "loss": 0.8584, + "num_input_tokens_seen": 330319744, + "step": 2615 + }, + { + "epoch": 0.6708138267171166, + "loss": 0.9499923586845398, + "loss_ce": 0.0007736086845397949, + "loss_iou": 0.44921875, + "loss_num": 0.01043701171875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 330319744, + "step": 2615 + }, + { + "epoch": 0.671070352081062, + "grad_norm": 50.70442581176758, + "learning_rate": 5e-06, + "loss": 0.9683, + "num_input_tokens_seen": 330446004, + "step": 2616 + }, + { + "epoch": 0.671070352081062, + "loss": 0.9763659238815308, + "loss_ce": 0.001756608602590859, + "loss_iou": 0.453125, + "loss_num": 0.01397705078125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 330446004, + "step": 2616 + }, + { + "epoch": 0.6713268774450074, + "grad_norm": 39.5313835144043, + "learning_rate": 5e-06, + "loss": 0.8941, + "num_input_tokens_seen": 330572832, + "step": 2617 + }, + { + "epoch": 0.6713268774450074, + "loss": 0.8067336082458496, + "loss_ce": 9.297236829297617e-05, + "loss_iou": 0.388671875, + "loss_num": 0.005706787109375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 330572832, + "step": 2617 + }, + { + "epoch": 0.6715834028089528, + "grad_norm": 47.32804870605469, + "learning_rate": 5e-06, + "loss": 1.0336, + "num_input_tokens_seen": 330699568, + "step": 2618 + }, + { + "epoch": 0.6715834028089528, + "loss": 1.0725769996643066, + "loss_ce": 0.00055544706992805, + "loss_iou": 0.484375, + "loss_num": 0.0205078125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 330699568, + "step": 2618 + }, + { + "epoch": 0.6718399281728981, + "grad_norm": 76.59149932861328, + "learning_rate": 5e-06, + "loss": 1.0651, + "num_input_tokens_seen": 330826532, + "step": 2619 + }, + { + "epoch": 0.6718399281728981, + "loss": 1.193537712097168, + "loss_ce": 0.000666723761241883, + "loss_iou": 0.53515625, + "loss_num": 0.0247802734375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 330826532, + "step": 2619 + }, + { + "epoch": 0.6720964535368434, + "grad_norm": 50.16630172729492, + "learning_rate": 5e-06, + "loss": 1.1236, + "num_input_tokens_seen": 330952620, + "step": 2620 + }, + { + "epoch": 0.6720964535368434, + "loss": 1.0960618257522583, + "loss_ce": 0.0003586825623642653, + "loss_iou": 0.515625, + "loss_num": 0.0125732421875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 330952620, + "step": 2620 + }, + { + "epoch": 0.6723529789007888, + "grad_norm": 22.544261932373047, + "learning_rate": 5e-06, + "loss": 0.8174, + "num_input_tokens_seen": 331079216, + "step": 2621 + }, + { + "epoch": 0.6723529789007888, + "loss": 0.8145396709442139, + "loss_ce": 8.658501610625535e-05, + "loss_iou": 0.3828125, + "loss_num": 0.009521484375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 331079216, + "step": 2621 + }, + { + "epoch": 0.6726095042647342, + "grad_norm": 21.712814331054688, + "learning_rate": 5e-06, + "loss": 1.0115, + "num_input_tokens_seen": 331206072, + "step": 2622 + }, + { + "epoch": 0.6726095042647342, + "loss": 0.732024073600769, + "loss_ce": 0.002287733368575573, + "loss_iou": 0.341796875, + "loss_num": 0.0093994140625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 331206072, + "step": 2622 + }, + { + "epoch": 0.6728660296286796, + "grad_norm": 24.271188735961914, + "learning_rate": 5e-06, + "loss": 0.8571, + "num_input_tokens_seen": 331332124, + "step": 2623 + }, + { + "epoch": 0.6728660296286796, + "loss": 0.7758426666259766, + "loss_ce": 0.0004520603106357157, + "loss_iou": 0.359375, + "loss_num": 0.0115966796875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 331332124, + "step": 2623 + }, + { + "epoch": 0.6731225549926249, + "grad_norm": 45.45386505126953, + "learning_rate": 5e-06, + "loss": 1.0615, + "num_input_tokens_seen": 331459184, + "step": 2624 + }, + { + "epoch": 0.6731225549926249, + "loss": 1.170393943786621, + "loss_ce": 0.0012045535258948803, + "loss_iou": 0.498046875, + "loss_num": 0.034423828125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 331459184, + "step": 2624 + }, + { + "epoch": 0.6733790803565702, + "grad_norm": 52.11981201171875, + "learning_rate": 5e-06, + "loss": 1.0992, + "num_input_tokens_seen": 331585340, + "step": 2625 + }, + { + "epoch": 0.6733790803565702, + "loss": 1.1299861669540405, + "loss_ce": 0.00010332637612009421, + "loss_iou": 0.51171875, + "loss_num": 0.0216064453125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 331585340, + "step": 2625 + }, + { + "epoch": 0.6736356057205156, + "grad_norm": 67.46333312988281, + "learning_rate": 5e-06, + "loss": 1.1056, + "num_input_tokens_seen": 331711172, + "step": 2626 + }, + { + "epoch": 0.6736356057205156, + "loss": 1.1611733436584473, + "loss_ce": 0.0005287847598083317, + "loss_iou": 0.51171875, + "loss_num": 0.02685546875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 331711172, + "step": 2626 + }, + { + "epoch": 0.673892131084461, + "grad_norm": 59.92705154418945, + "learning_rate": 5e-06, + "loss": 1.0489, + "num_input_tokens_seen": 331837380, + "step": 2627 + }, + { + "epoch": 0.673892131084461, + "loss": 1.3939323425292969, + "loss_ce": 0.00037761940620839596, + "loss_iou": 0.62109375, + "loss_num": 0.0311279296875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 331837380, + "step": 2627 + }, + { + "epoch": 0.6741486564484064, + "grad_norm": 59.04011154174805, + "learning_rate": 5e-06, + "loss": 0.9846, + "num_input_tokens_seen": 331964920, + "step": 2628 + }, + { + "epoch": 0.6741486564484064, + "loss": 1.0295162200927734, + "loss_ce": 0.0011959560215473175, + "loss_iou": 0.46875, + "loss_num": 0.0185546875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 331964920, + "step": 2628 + }, + { + "epoch": 0.6744051818123517, + "grad_norm": 50.19484329223633, + "learning_rate": 5e-06, + "loss": 0.9816, + "num_input_tokens_seen": 332091676, + "step": 2629 + }, + { + "epoch": 0.6744051818123517, + "loss": 0.95802241563797, + "loss_ce": 0.0039207953959703445, + "loss_iou": 0.443359375, + "loss_num": 0.01348876953125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 332091676, + "step": 2629 + }, + { + "epoch": 0.674661707176297, + "grad_norm": 21.684646606445312, + "learning_rate": 5e-06, + "loss": 0.9122, + "num_input_tokens_seen": 332217816, + "step": 2630 + }, + { + "epoch": 0.674661707176297, + "loss": 0.8740447163581848, + "loss_ce": 0.004415811970829964, + "loss_iou": 0.39453125, + "loss_num": 0.0159912109375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 332217816, + "step": 2630 + }, + { + "epoch": 0.6749182325402424, + "grad_norm": 36.10974884033203, + "learning_rate": 5e-06, + "loss": 0.9735, + "num_input_tokens_seen": 332344256, + "step": 2631 + }, + { + "epoch": 0.6749182325402424, + "loss": 1.1013665199279785, + "loss_ce": 0.0010248173493891954, + "loss_iou": 0.515625, + "loss_num": 0.0135498046875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 332344256, + "step": 2631 + }, + { + "epoch": 0.6751747579041878, + "grad_norm": 61.22284698486328, + "learning_rate": 5e-06, + "loss": 0.9698, + "num_input_tokens_seen": 332470276, + "step": 2632 + }, + { + "epoch": 0.6751747579041878, + "loss": 0.8887766599655151, + "loss_ce": 0.00034897681325674057, + "loss_iou": 0.421875, + "loss_num": 0.00927734375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 332470276, + "step": 2632 + }, + { + "epoch": 0.6754312832681332, + "grad_norm": 50.0896110534668, + "learning_rate": 5e-06, + "loss": 1.0844, + "num_input_tokens_seen": 332596684, + "step": 2633 + }, + { + "epoch": 0.6754312832681332, + "loss": 1.210376501083374, + "loss_ce": 0.004321814514696598, + "loss_iou": 0.53515625, + "loss_num": 0.026611328125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 332596684, + "step": 2633 + }, + { + "epoch": 0.6756878086320784, + "grad_norm": 32.03232192993164, + "learning_rate": 5e-06, + "loss": 0.9061, + "num_input_tokens_seen": 332722220, + "step": 2634 + }, + { + "epoch": 0.6756878086320784, + "loss": 0.9343405365943909, + "loss_ce": 0.0007467715768143535, + "loss_iou": 0.439453125, + "loss_num": 0.01104736328125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 332722220, + "step": 2634 + }, + { + "epoch": 0.6759443339960238, + "grad_norm": 39.76304626464844, + "learning_rate": 5e-06, + "loss": 0.9524, + "num_input_tokens_seen": 332847480, + "step": 2635 + }, + { + "epoch": 0.6759443339960238, + "loss": 0.9822904467582703, + "loss_ce": 0.0008451397297903895, + "loss_iou": 0.4609375, + "loss_num": 0.01214599609375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 332847480, + "step": 2635 + }, + { + "epoch": 0.6762008593599692, + "grad_norm": 70.54532623291016, + "learning_rate": 5e-06, + "loss": 0.943, + "num_input_tokens_seen": 332973584, + "step": 2636 + }, + { + "epoch": 0.6762008593599692, + "loss": 0.9069019556045532, + "loss_ce": 0.00016367467469535768, + "loss_iou": 0.423828125, + "loss_num": 0.01177978515625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 332973584, + "step": 2636 + }, + { + "epoch": 0.6764573847239146, + "grad_norm": 41.901947021484375, + "learning_rate": 5e-06, + "loss": 0.9125, + "num_input_tokens_seen": 333098256, + "step": 2637 + }, + { + "epoch": 0.6764573847239146, + "loss": 0.9805340766906738, + "loss_ce": 0.002262656344100833, + "loss_iou": 0.4453125, + "loss_num": 0.0174560546875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 333098256, + "step": 2637 + }, + { + "epoch": 0.67671391008786, + "grad_norm": 39.998374938964844, + "learning_rate": 5e-06, + "loss": 0.9543, + "num_input_tokens_seen": 333224072, + "step": 2638 + }, + { + "epoch": 0.67671391008786, + "loss": 1.0625267028808594, + "loss_ce": 0.0005150529905222356, + "loss_iou": 0.50390625, + "loss_num": 0.01123046875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 333224072, + "step": 2638 + }, + { + "epoch": 0.6769704354518054, + "grad_norm": 39.6991081237793, + "learning_rate": 5e-06, + "loss": 0.9621, + "num_input_tokens_seen": 333350020, + "step": 2639 + }, + { + "epoch": 0.6769704354518054, + "loss": 0.9661704897880554, + "loss_ce": 0.0003502287436276674, + "loss_iou": 0.451171875, + "loss_num": 0.0126953125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 333350020, + "step": 2639 + }, + { + "epoch": 0.6772269608157506, + "grad_norm": 44.844139099121094, + "learning_rate": 5e-06, + "loss": 0.8583, + "num_input_tokens_seen": 333477040, + "step": 2640 + }, + { + "epoch": 0.6772269608157506, + "loss": 0.851862370967865, + "loss_ce": 0.002741270000115037, + "loss_iou": 0.40234375, + "loss_num": 0.00872802734375, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 333477040, + "step": 2640 + }, + { + "epoch": 0.677483486179696, + "grad_norm": 70.47373962402344, + "learning_rate": 5e-06, + "loss": 1.0566, + "num_input_tokens_seen": 333603772, + "step": 2641 + }, + { + "epoch": 0.677483486179696, + "loss": 1.0638738870620728, + "loss_ce": 0.0018621678464114666, + "loss_iou": 0.490234375, + "loss_num": 0.0166015625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 333603772, + "step": 2641 + }, + { + "epoch": 0.6777400115436414, + "grad_norm": 44.66590118408203, + "learning_rate": 5e-06, + "loss": 0.838, + "num_input_tokens_seen": 333729416, + "step": 2642 + }, + { + "epoch": 0.6777400115436414, + "loss": 0.7406109571456909, + "loss_ce": 0.000620744307525456, + "loss_iou": 0.349609375, + "loss_num": 0.0084228515625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 333729416, + "step": 2642 + }, + { + "epoch": 0.6779965369075868, + "grad_norm": 37.84136962890625, + "learning_rate": 5e-06, + "loss": 0.8783, + "num_input_tokens_seen": 333856588, + "step": 2643 + }, + { + "epoch": 0.6779965369075868, + "loss": 0.8214726448059082, + "loss_ce": 0.0006718781078234315, + "loss_iou": 0.392578125, + "loss_num": 0.0069580078125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 333856588, + "step": 2643 + }, + { + "epoch": 0.6782530622715321, + "grad_norm": 39.2398567199707, + "learning_rate": 5e-06, + "loss": 1.1343, + "num_input_tokens_seen": 333982456, + "step": 2644 + }, + { + "epoch": 0.6782530622715321, + "loss": 1.3410954475402832, + "loss_ce": 0.002716601826250553, + "loss_iou": 0.58203125, + "loss_num": 0.034912109375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 333982456, + "step": 2644 + }, + { + "epoch": 0.6785095876354774, + "grad_norm": 58.09175109863281, + "learning_rate": 5e-06, + "loss": 0.9793, + "num_input_tokens_seen": 334110220, + "step": 2645 + }, + { + "epoch": 0.6785095876354774, + "loss": 1.2050714492797852, + "loss_ce": 0.002434713765978813, + "loss_iou": 0.5390625, + "loss_num": 0.0244140625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 334110220, + "step": 2645 + }, + { + "epoch": 0.6787661129994228, + "grad_norm": 51.74338912963867, + "learning_rate": 5e-06, + "loss": 1.032, + "num_input_tokens_seen": 334236800, + "step": 2646 + }, + { + "epoch": 0.6787661129994228, + "loss": 0.8497709035873413, + "loss_ce": 0.002602929947897792, + "loss_iou": 0.400390625, + "loss_num": 0.00958251953125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 334236800, + "step": 2646 + }, + { + "epoch": 0.6790226383633682, + "grad_norm": 35.887535095214844, + "learning_rate": 5e-06, + "loss": 1.0008, + "num_input_tokens_seen": 334362132, + "step": 2647 + }, + { + "epoch": 0.6790226383633682, + "loss": 0.7992653250694275, + "loss_ce": 0.001413752674125135, + "loss_iou": 0.369140625, + "loss_num": 0.0115966796875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 334362132, + "step": 2647 + }, + { + "epoch": 0.6792791637273136, + "grad_norm": 37.054656982421875, + "learning_rate": 5e-06, + "loss": 0.915, + "num_input_tokens_seen": 334487020, + "step": 2648 + }, + { + "epoch": 0.6792791637273136, + "loss": 0.9035756587982178, + "loss_ce": 0.0004994976334273815, + "loss_iou": 0.427734375, + "loss_num": 0.00946044921875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 334487020, + "step": 2648 + }, + { + "epoch": 0.6795356890912589, + "grad_norm": 86.69132232666016, + "learning_rate": 5e-06, + "loss": 1.0476, + "num_input_tokens_seen": 334614148, + "step": 2649 + }, + { + "epoch": 0.6795356890912589, + "loss": 1.0119752883911133, + "loss_ce": 0.003674438688904047, + "loss_iou": 0.482421875, + "loss_num": 0.00909423828125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 334614148, + "step": 2649 + }, + { + "epoch": 0.6797922144552042, + "grad_norm": 58.45771408081055, + "learning_rate": 5e-06, + "loss": 1.002, + "num_input_tokens_seen": 334740044, + "step": 2650 + }, + { + "epoch": 0.6797922144552042, + "loss": 1.0325878858566284, + "loss_ce": 0.004755828063935041, + "loss_iou": 0.47265625, + "loss_num": 0.0162353515625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 334740044, + "step": 2650 + }, + { + "epoch": 0.6800487398191496, + "grad_norm": 36.90290069580078, + "learning_rate": 5e-06, + "loss": 0.9857, + "num_input_tokens_seen": 334866680, + "step": 2651 + }, + { + "epoch": 0.6800487398191496, + "loss": 0.838642418384552, + "loss_ce": 0.0002635386190377176, + "loss_iou": 0.40234375, + "loss_num": 0.007049560546875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 334866680, + "step": 2651 + }, + { + "epoch": 0.680305265183095, + "grad_norm": 30.725276947021484, + "learning_rate": 5e-06, + "loss": 0.9262, + "num_input_tokens_seen": 334993188, + "step": 2652 + }, + { + "epoch": 0.680305265183095, + "loss": 0.7201814651489258, + "loss_ce": 0.00045490573393180966, + "loss_iou": 0.341796875, + "loss_num": 0.006866455078125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 334993188, + "step": 2652 + }, + { + "epoch": 0.6805617905470404, + "grad_norm": 59.09490966796875, + "learning_rate": 5e-06, + "loss": 0.9335, + "num_input_tokens_seen": 335120428, + "step": 2653 + }, + { + "epoch": 0.6805617905470404, + "loss": 0.8974853754043579, + "loss_ce": 0.004907255060970783, + "loss_iou": 0.42578125, + "loss_num": 0.0078125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 335120428, + "step": 2653 + }, + { + "epoch": 0.6808183159109857, + "grad_norm": 55.028114318847656, + "learning_rate": 5e-06, + "loss": 1.0219, + "num_input_tokens_seen": 335245748, + "step": 2654 + }, + { + "epoch": 0.6808183159109857, + "loss": 1.1647305488586426, + "loss_ce": 0.0011563095031306148, + "loss_iou": 0.52734375, + "loss_num": 0.021240234375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 335245748, + "step": 2654 + }, + { + "epoch": 0.681074841274931, + "grad_norm": 26.385570526123047, + "learning_rate": 5e-06, + "loss": 0.8858, + "num_input_tokens_seen": 335371108, + "step": 2655 + }, + { + "epoch": 0.681074841274931, + "loss": 0.7679165601730347, + "loss_ce": 0.0003384568844921887, + "loss_iou": 0.359375, + "loss_num": 0.0096435546875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 335371108, + "step": 2655 + }, + { + "epoch": 0.6813313666388764, + "grad_norm": 41.15224838256836, + "learning_rate": 5e-06, + "loss": 0.9392, + "num_input_tokens_seen": 335497760, + "step": 2656 + }, + { + "epoch": 0.6813313666388764, + "loss": 0.9213315844535828, + "loss_ce": 0.0018979882588610053, + "loss_iou": 0.42578125, + "loss_num": 0.0133056640625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 335497760, + "step": 2656 + }, + { + "epoch": 0.6815878920028218, + "grad_norm": 53.84215545654297, + "learning_rate": 5e-06, + "loss": 0.9773, + "num_input_tokens_seen": 335624792, + "step": 2657 + }, + { + "epoch": 0.6815878920028218, + "loss": 0.9436874985694885, + "loss_ce": 8.397691999562085e-05, + "loss_iou": 0.4296875, + "loss_num": 0.016845703125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 335624792, + "step": 2657 + }, + { + "epoch": 0.6818444173667672, + "grad_norm": 48.227294921875, + "learning_rate": 5e-06, + "loss": 1.0054, + "num_input_tokens_seen": 335751152, + "step": 2658 + }, + { + "epoch": 0.6818444173667672, + "loss": 0.9750728607177734, + "loss_ce": 0.0024166181683540344, + "loss_iou": 0.447265625, + "loss_num": 0.015625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 335751152, + "step": 2658 + }, + { + "epoch": 0.6821009427307125, + "grad_norm": 41.31946563720703, + "learning_rate": 5e-06, + "loss": 0.9794, + "num_input_tokens_seen": 335877780, + "step": 2659 + }, + { + "epoch": 0.6821009427307125, + "loss": 0.9192448258399963, + "loss_ce": 0.00322920735925436, + "loss_iou": 0.416015625, + "loss_num": 0.016357421875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 335877780, + "step": 2659 + }, + { + "epoch": 0.6823574680946578, + "grad_norm": 56.505393981933594, + "learning_rate": 5e-06, + "loss": 1.0202, + "num_input_tokens_seen": 336004252, + "step": 2660 + }, + { + "epoch": 0.6823574680946578, + "loss": 0.9573144912719727, + "loss_ce": 0.003212913405150175, + "loss_iou": 0.439453125, + "loss_num": 0.01513671875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 336004252, + "step": 2660 + }, + { + "epoch": 0.6826139934586032, + "grad_norm": 101.24784851074219, + "learning_rate": 5e-06, + "loss": 0.9807, + "num_input_tokens_seen": 336131160, + "step": 2661 + }, + { + "epoch": 0.6826139934586032, + "loss": 0.8865087032318115, + "loss_ce": 0.0012548131635412574, + "loss_iou": 0.427734375, + "loss_num": 0.005767822265625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 336131160, + "step": 2661 + }, + { + "epoch": 0.6828705188225486, + "grad_norm": 49.149051666259766, + "learning_rate": 5e-06, + "loss": 0.9886, + "num_input_tokens_seen": 336257948, + "step": 2662 + }, + { + "epoch": 0.6828705188225486, + "loss": 1.0268540382385254, + "loss_ce": 0.0019517116015776992, + "loss_iou": 0.466796875, + "loss_num": 0.0184326171875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 336257948, + "step": 2662 + }, + { + "epoch": 0.683127044186494, + "grad_norm": 27.82670783996582, + "learning_rate": 5e-06, + "loss": 0.9931, + "num_input_tokens_seen": 336383532, + "step": 2663 + }, + { + "epoch": 0.683127044186494, + "loss": 0.956154465675354, + "loss_ce": 0.0005880504613742232, + "loss_iou": 0.44140625, + "loss_num": 0.0146484375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 336383532, + "step": 2663 + }, + { + "epoch": 0.6833835695504393, + "grad_norm": 32.34475326538086, + "learning_rate": 5e-06, + "loss": 0.9423, + "num_input_tokens_seen": 336509344, + "step": 2664 + }, + { + "epoch": 0.6833835695504393, + "loss": 0.9320732951164246, + "loss_ce": 0.004338920582085848, + "loss_iou": 0.4375, + "loss_num": 0.01092529296875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 336509344, + "step": 2664 + }, + { + "epoch": 0.6836400949143847, + "grad_norm": 41.593528747558594, + "learning_rate": 5e-06, + "loss": 1.0129, + "num_input_tokens_seen": 336636124, + "step": 2665 + }, + { + "epoch": 0.6836400949143847, + "loss": 1.1943963766098022, + "loss_ce": 0.0005486942827701569, + "loss_iou": 0.53125, + "loss_num": 0.0263671875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 336636124, + "step": 2665 + }, + { + "epoch": 0.68389662027833, + "grad_norm": 39.749794006347656, + "learning_rate": 5e-06, + "loss": 0.8781, + "num_input_tokens_seen": 336762276, + "step": 2666 + }, + { + "epoch": 0.68389662027833, + "loss": 0.8173080682754517, + "loss_ce": 0.0006576706655323505, + "loss_iou": 0.3828125, + "loss_num": 0.01031494140625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 336762276, + "step": 2666 + }, + { + "epoch": 0.6841531456422754, + "grad_norm": 53.206905364990234, + "learning_rate": 5e-06, + "loss": 0.9554, + "num_input_tokens_seen": 336889712, + "step": 2667 + }, + { + "epoch": 0.6841531456422754, + "loss": 0.9869515299797058, + "loss_ce": 0.002820669673383236, + "loss_iou": 0.443359375, + "loss_num": 0.01953125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 336889712, + "step": 2667 + }, + { + "epoch": 0.6844096710062207, + "grad_norm": 43.915008544921875, + "learning_rate": 5e-06, + "loss": 1.0257, + "num_input_tokens_seen": 337016016, + "step": 2668 + }, + { + "epoch": 0.6844096710062207, + "loss": 0.8745729923248291, + "loss_ce": 0.001526065170764923, + "loss_iou": 0.4140625, + "loss_num": 0.0093994140625, + "loss_xval": 0.875, + "num_input_tokens_seen": 337016016, + "step": 2668 + }, + { + "epoch": 0.6846661963701661, + "grad_norm": 29.643739700317383, + "learning_rate": 5e-06, + "loss": 0.9069, + "num_input_tokens_seen": 337143872, + "step": 2669 + }, + { + "epoch": 0.6846661963701661, + "loss": 0.9747914671897888, + "loss_ce": 0.0011586518958210945, + "loss_iou": 0.44921875, + "loss_num": 0.01519775390625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 337143872, + "step": 2669 + }, + { + "epoch": 0.6849227217341115, + "grad_norm": 41.910369873046875, + "learning_rate": 5e-06, + "loss": 0.9872, + "num_input_tokens_seen": 337269896, + "step": 2670 + }, + { + "epoch": 0.6849227217341115, + "loss": 1.0998716354370117, + "loss_ce": 0.0012388963950797915, + "loss_iou": 0.48828125, + "loss_num": 0.02490234375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 337269896, + "step": 2670 + }, + { + "epoch": 0.6851792470980568, + "grad_norm": 34.42910385131836, + "learning_rate": 5e-06, + "loss": 0.9444, + "num_input_tokens_seen": 337396240, + "step": 2671 + }, + { + "epoch": 0.6851792470980568, + "loss": 0.8935791254043579, + "loss_ce": 0.00466315308585763, + "loss_iou": 0.41015625, + "loss_num": 0.01397705078125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 337396240, + "step": 2671 + }, + { + "epoch": 0.6854357724620022, + "grad_norm": 34.492427825927734, + "learning_rate": 5e-06, + "loss": 0.9888, + "num_input_tokens_seen": 337522984, + "step": 2672 + }, + { + "epoch": 0.6854357724620022, + "loss": 0.9644553065299988, + "loss_ce": 0.004982654936611652, + "loss_iou": 0.44921875, + "loss_num": 0.012451171875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 337522984, + "step": 2672 + }, + { + "epoch": 0.6856922978259475, + "grad_norm": 24.0233097076416, + "learning_rate": 5e-06, + "loss": 0.8885, + "num_input_tokens_seen": 337649404, + "step": 2673 + }, + { + "epoch": 0.6856922978259475, + "loss": 0.9603058695793152, + "loss_ce": 0.0018098073778674006, + "loss_iou": 0.4453125, + "loss_num": 0.01348876953125, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 337649404, + "step": 2673 + }, + { + "epoch": 0.6859488231898929, + "grad_norm": 42.25082015991211, + "learning_rate": 5e-06, + "loss": 1.1109, + "num_input_tokens_seen": 337774848, + "step": 2674 + }, + { + "epoch": 0.6859488231898929, + "loss": 1.2508373260498047, + "loss_ce": 0.0018139845924451947, + "loss_iou": 0.56640625, + "loss_num": 0.02294921875, + "loss_xval": 1.25, + "num_input_tokens_seen": 337774848, + "step": 2674 + }, + { + "epoch": 0.6862053485538383, + "grad_norm": 36.796085357666016, + "learning_rate": 5e-06, + "loss": 1.0127, + "num_input_tokens_seen": 337900608, + "step": 2675 + }, + { + "epoch": 0.6862053485538383, + "loss": 0.84504234790802, + "loss_ce": 7.160591485444456e-05, + "loss_iou": 0.39453125, + "loss_num": 0.010986328125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 337900608, + "step": 2675 + }, + { + "epoch": 0.6864618739177836, + "grad_norm": 50.710540771484375, + "learning_rate": 5e-06, + "loss": 0.9681, + "num_input_tokens_seen": 338027120, + "step": 2676 + }, + { + "epoch": 0.6864618739177836, + "loss": 1.051001787185669, + "loss_ce": 0.0011971069034188986, + "loss_iou": 0.466796875, + "loss_num": 0.0228271484375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 338027120, + "step": 2676 + }, + { + "epoch": 0.686718399281729, + "grad_norm": 41.053321838378906, + "learning_rate": 5e-06, + "loss": 1.0076, + "num_input_tokens_seen": 338152516, + "step": 2677 + }, + { + "epoch": 0.686718399281729, + "loss": 1.1055819988250732, + "loss_ce": 0.0015782143454998732, + "loss_iou": 0.5078125, + "loss_num": 0.0172119140625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 338152516, + "step": 2677 + }, + { + "epoch": 0.6869749246456743, + "grad_norm": 25.512685775756836, + "learning_rate": 5e-06, + "loss": 0.8785, + "num_input_tokens_seen": 338278136, + "step": 2678 + }, + { + "epoch": 0.6869749246456743, + "loss": 0.7821996212005615, + "loss_ce": 0.0002172474196413532, + "loss_iou": 0.375, + "loss_num": 0.006622314453125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 338278136, + "step": 2678 + }, + { + "epoch": 0.6872314500096197, + "grad_norm": 35.911598205566406, + "learning_rate": 5e-06, + "loss": 1.0224, + "num_input_tokens_seen": 338403848, + "step": 2679 + }, + { + "epoch": 0.6872314500096197, + "loss": 1.0616683959960938, + "loss_ce": 0.0013656590599566698, + "loss_iou": 0.478515625, + "loss_num": 0.020263671875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 338403848, + "step": 2679 + }, + { + "epoch": 0.6874879753735651, + "grad_norm": 77.37936401367188, + "learning_rate": 5e-06, + "loss": 1.0095, + "num_input_tokens_seen": 338530972, + "step": 2680 + }, + { + "epoch": 0.6874879753735651, + "loss": 0.9487123489379883, + "loss_ce": 0.00047017098404467106, + "loss_iou": 0.4453125, + "loss_num": 0.0118408203125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 338530972, + "step": 2680 + }, + { + "epoch": 0.6877445007375104, + "grad_norm": 42.757564544677734, + "learning_rate": 5e-06, + "loss": 0.9868, + "num_input_tokens_seen": 338656348, + "step": 2681 + }, + { + "epoch": 0.6877445007375104, + "loss": 0.9803095459938049, + "loss_ce": 0.00032907279091887176, + "loss_iou": 0.46484375, + "loss_num": 0.01019287109375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 338656348, + "step": 2681 + }, + { + "epoch": 0.6880010261014557, + "grad_norm": 42.65533447265625, + "learning_rate": 5e-06, + "loss": 0.7984, + "num_input_tokens_seen": 338782484, + "step": 2682 + }, + { + "epoch": 0.6880010261014557, + "loss": 0.7567192316055298, + "loss_ce": 0.0023246773052960634, + "loss_iou": 0.34765625, + "loss_num": 0.01165771484375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 338782484, + "step": 2682 + }, + { + "epoch": 0.6882575514654011, + "grad_norm": 58.975624084472656, + "learning_rate": 5e-06, + "loss": 0.8581, + "num_input_tokens_seen": 338909052, + "step": 2683 + }, + { + "epoch": 0.6882575514654011, + "loss": 0.8836138844490051, + "loss_ce": 6.895825936226174e-05, + "loss_iou": 0.419921875, + "loss_num": 0.008544921875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 338909052, + "step": 2683 + }, + { + "epoch": 0.6885140768293465, + "grad_norm": 51.10431671142578, + "learning_rate": 5e-06, + "loss": 0.9764, + "num_input_tokens_seen": 339035220, + "step": 2684 + }, + { + "epoch": 0.6885140768293465, + "loss": 0.9259170293807983, + "loss_ce": 0.00013576692435890436, + "loss_iou": 0.423828125, + "loss_num": 0.0159912109375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 339035220, + "step": 2684 + }, + { + "epoch": 0.6887706021932919, + "grad_norm": 43.108421325683594, + "learning_rate": 5e-06, + "loss": 1.0269, + "num_input_tokens_seen": 339161712, + "step": 2685 + }, + { + "epoch": 0.6887706021932919, + "loss": 1.107222318649292, + "loss_ce": 0.00688056368380785, + "loss_iou": 0.48828125, + "loss_num": 0.0247802734375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 339161712, + "step": 2685 + }, + { + "epoch": 0.6890271275572373, + "grad_norm": 50.314964294433594, + "learning_rate": 5e-06, + "loss": 1.0085, + "num_input_tokens_seen": 339288076, + "step": 2686 + }, + { + "epoch": 0.6890271275572373, + "loss": 1.1594195365905762, + "loss_ce": 0.0046343812718987465, + "loss_iou": 0.5078125, + "loss_num": 0.0283203125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 339288076, + "step": 2686 + }, + { + "epoch": 0.6892836529211825, + "grad_norm": 73.81104278564453, + "learning_rate": 5e-06, + "loss": 0.9643, + "num_input_tokens_seen": 339414844, + "step": 2687 + }, + { + "epoch": 0.6892836529211825, + "loss": 1.0992200374603271, + "loss_ce": 0.00449356809258461, + "loss_iou": 0.48828125, + "loss_num": 0.0234375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 339414844, + "step": 2687 + }, + { + "epoch": 0.6895401782851279, + "grad_norm": 54.181095123291016, + "learning_rate": 5e-06, + "loss": 1.1201, + "num_input_tokens_seen": 339541108, + "step": 2688 + }, + { + "epoch": 0.6895401782851279, + "loss": 1.049062728881836, + "loss_ce": 0.0007228367030620575, + "loss_iou": 0.494140625, + "loss_num": 0.01220703125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 339541108, + "step": 2688 + }, + { + "epoch": 0.6897967036490733, + "grad_norm": 26.643260955810547, + "learning_rate": 5e-06, + "loss": 0.8724, + "num_input_tokens_seen": 339666892, + "step": 2689 + }, + { + "epoch": 0.6897967036490733, + "loss": 0.8100067377090454, + "loss_ce": 0.0016571050509810448, + "loss_iou": 0.37890625, + "loss_num": 0.010498046875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 339666892, + "step": 2689 + }, + { + "epoch": 0.6900532290130187, + "grad_norm": 30.400203704833984, + "learning_rate": 5e-06, + "loss": 0.9468, + "num_input_tokens_seen": 339793796, + "step": 2690 + }, + { + "epoch": 0.6900532290130187, + "loss": 0.9118818044662476, + "loss_ce": 0.0012372962664812803, + "loss_iou": 0.427734375, + "loss_num": 0.010986328125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 339793796, + "step": 2690 + }, + { + "epoch": 0.6903097543769641, + "grad_norm": 45.60205841064453, + "learning_rate": 5e-06, + "loss": 0.9926, + "num_input_tokens_seen": 339920916, + "step": 2691 + }, + { + "epoch": 0.6903097543769641, + "loss": 0.9568983316421509, + "loss_ce": 0.00182017358019948, + "loss_iou": 0.427734375, + "loss_num": 0.01953125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 339920916, + "step": 2691 + }, + { + "epoch": 0.6905662797409093, + "grad_norm": 32.39512252807617, + "learning_rate": 5e-06, + "loss": 0.8911, + "num_input_tokens_seen": 340046044, + "step": 2692 + }, + { + "epoch": 0.6905662797409093, + "loss": 0.8389978408813477, + "loss_ce": 0.0003747821319848299, + "loss_iou": 0.40625, + "loss_num": 0.00506591796875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 340046044, + "step": 2692 + }, + { + "epoch": 0.6908228051048547, + "grad_norm": 40.34032440185547, + "learning_rate": 5e-06, + "loss": 0.9729, + "num_input_tokens_seen": 340172784, + "step": 2693 + }, + { + "epoch": 0.6908228051048547, + "loss": 0.8655314445495605, + "loss_ce": 0.0017619330901652575, + "loss_iou": 0.39453125, + "loss_num": 0.01513671875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 340172784, + "step": 2693 + }, + { + "epoch": 0.6910793304688001, + "grad_norm": 38.03837203979492, + "learning_rate": 5e-06, + "loss": 1.0056, + "num_input_tokens_seen": 340297676, + "step": 2694 + }, + { + "epoch": 0.6910793304688001, + "loss": 0.8204924464225769, + "loss_ce": 0.00017995102098211646, + "loss_iou": 0.390625, + "loss_num": 0.0074462890625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 340297676, + "step": 2694 + }, + { + "epoch": 0.6913358558327455, + "grad_norm": 52.592254638671875, + "learning_rate": 5e-06, + "loss": 0.9584, + "num_input_tokens_seen": 340424468, + "step": 2695 + }, + { + "epoch": 0.6913358558327455, + "loss": 0.932125985622406, + "loss_ce": 0.0009736352949403226, + "loss_iou": 0.4296875, + "loss_num": 0.01416015625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 340424468, + "step": 2695 + }, + { + "epoch": 0.6915923811966909, + "grad_norm": 68.29264068603516, + "learning_rate": 5e-06, + "loss": 0.8313, + "num_input_tokens_seen": 340550840, + "step": 2696 + }, + { + "epoch": 0.6915923811966909, + "loss": 1.0239650011062622, + "loss_ce": 0.003457213519141078, + "loss_iou": 0.474609375, + "loss_num": 0.01446533203125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 340550840, + "step": 2696 + }, + { + "epoch": 0.6918489065606361, + "grad_norm": 48.940948486328125, + "learning_rate": 5e-06, + "loss": 1.0045, + "num_input_tokens_seen": 340677096, + "step": 2697 + }, + { + "epoch": 0.6918489065606361, + "loss": 0.881516695022583, + "loss_ce": 0.0006573515711352229, + "loss_iou": 0.41015625, + "loss_num": 0.01171875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 340677096, + "step": 2697 + }, + { + "epoch": 0.6921054319245815, + "grad_norm": 38.74660110473633, + "learning_rate": 5e-06, + "loss": 0.9172, + "num_input_tokens_seen": 340803316, + "step": 2698 + }, + { + "epoch": 0.6921054319245815, + "loss": 1.0077579021453857, + "loss_ce": 0.0009219619678333402, + "loss_iou": 0.4765625, + "loss_num": 0.01019287109375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 340803316, + "step": 2698 + }, + { + "epoch": 0.6923619572885269, + "grad_norm": 54.396278381347656, + "learning_rate": 5e-06, + "loss": 1.0235, + "num_input_tokens_seen": 340929376, + "step": 2699 + }, + { + "epoch": 0.6923619572885269, + "loss": 1.018249750137329, + "loss_ce": 0.0006715651834383607, + "loss_iou": 0.466796875, + "loss_num": 0.0164794921875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 340929376, + "step": 2699 + }, + { + "epoch": 0.6926184826524723, + "grad_norm": 70.2328109741211, + "learning_rate": 5e-06, + "loss": 0.988, + "num_input_tokens_seen": 341055360, + "step": 2700 + }, + { + "epoch": 0.6926184826524723, + "loss": 0.9376966953277588, + "loss_ce": 0.0001966974523384124, + "loss_iou": 0.431640625, + "loss_num": 0.01470947265625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 341055360, + "step": 2700 + }, + { + "epoch": 0.6928750080164177, + "grad_norm": 51.26341247558594, + "learning_rate": 5e-06, + "loss": 0.901, + "num_input_tokens_seen": 341181828, + "step": 2701 + }, + { + "epoch": 0.6928750080164177, + "loss": 0.8426923155784607, + "loss_ce": 0.002360300859436393, + "loss_iou": 0.40234375, + "loss_num": 0.007232666015625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 341181828, + "step": 2701 + }, + { + "epoch": 0.6931315333803629, + "grad_norm": 60.08789825439453, + "learning_rate": 5e-06, + "loss": 0.9972, + "num_input_tokens_seen": 341306808, + "step": 2702 + }, + { + "epoch": 0.6931315333803629, + "loss": 0.8673467636108398, + "loss_ce": 0.001135853584855795, + "loss_iou": 0.400390625, + "loss_num": 0.01300048828125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 341306808, + "step": 2702 + }, + { + "epoch": 0.6933880587443083, + "grad_norm": 52.51971435546875, + "learning_rate": 5e-06, + "loss": 1.1221, + "num_input_tokens_seen": 341433696, + "step": 2703 + }, + { + "epoch": 0.6933880587443083, + "loss": 0.8270750045776367, + "loss_ce": 0.0004148438456468284, + "loss_iou": 0.396484375, + "loss_num": 0.00701904296875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 341433696, + "step": 2703 + }, + { + "epoch": 0.6936445841082537, + "grad_norm": 67.77469635009766, + "learning_rate": 5e-06, + "loss": 0.9738, + "num_input_tokens_seen": 341559752, + "step": 2704 + }, + { + "epoch": 0.6936445841082537, + "loss": 0.8000087738037109, + "loss_ce": 0.001180693507194519, + "loss_iou": 0.380859375, + "loss_num": 0.00762939453125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 341559752, + "step": 2704 + }, + { + "epoch": 0.6939011094721991, + "grad_norm": 46.74452590942383, + "learning_rate": 5e-06, + "loss": 1.0463, + "num_input_tokens_seen": 341685812, + "step": 2705 + }, + { + "epoch": 0.6939011094721991, + "loss": 0.964853048324585, + "loss_ce": 0.004647955764085054, + "loss_iou": 0.443359375, + "loss_num": 0.01483154296875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 341685812, + "step": 2705 + }, + { + "epoch": 0.6941576348361445, + "grad_norm": 24.56528091430664, + "learning_rate": 5e-06, + "loss": 0.9229, + "num_input_tokens_seen": 341810548, + "step": 2706 + }, + { + "epoch": 0.6941576348361445, + "loss": 0.9695167541503906, + "loss_ce": 0.004184698220342398, + "loss_iou": 0.43359375, + "loss_num": 0.01953125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 341810548, + "step": 2706 + }, + { + "epoch": 0.6944141602000898, + "grad_norm": 36.977413177490234, + "learning_rate": 5e-06, + "loss": 0.8667, + "num_input_tokens_seen": 341937488, + "step": 2707 + }, + { + "epoch": 0.6944141602000898, + "loss": 0.8739184141159058, + "loss_ce": 0.0008715341100469232, + "loss_iou": 0.4140625, + "loss_num": 0.0087890625, + "loss_xval": 0.875, + "num_input_tokens_seen": 341937488, + "step": 2707 + }, + { + "epoch": 0.6946706855640351, + "grad_norm": 59.94297790527344, + "learning_rate": 5e-06, + "loss": 0.9319, + "num_input_tokens_seen": 342064388, + "step": 2708 + }, + { + "epoch": 0.6946706855640351, + "loss": 0.9138249158859253, + "loss_ce": 0.0012272614985704422, + "loss_iou": 0.42578125, + "loss_num": 0.01220703125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 342064388, + "step": 2708 + }, + { + "epoch": 0.6949272109279805, + "grad_norm": 43.3054084777832, + "learning_rate": 5e-06, + "loss": 0.9617, + "num_input_tokens_seen": 342191664, + "step": 2709 + }, + { + "epoch": 0.6949272109279805, + "loss": 1.0077342987060547, + "loss_ce": 0.0004100327496416867, + "loss_iou": 0.466796875, + "loss_num": 0.01483154296875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 342191664, + "step": 2709 + }, + { + "epoch": 0.6951837362919259, + "grad_norm": 43.03889465332031, + "learning_rate": 5e-06, + "loss": 0.854, + "num_input_tokens_seen": 342317860, + "step": 2710 + }, + { + "epoch": 0.6951837362919259, + "loss": 0.580342710018158, + "loss_ce": 0.0009970084065571427, + "loss_iou": 0.27734375, + "loss_num": 0.004730224609375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 342317860, + "step": 2710 + }, + { + "epoch": 0.6954402616558713, + "grad_norm": 61.31053161621094, + "learning_rate": 5e-06, + "loss": 0.9481, + "num_input_tokens_seen": 342443800, + "step": 2711 + }, + { + "epoch": 0.6954402616558713, + "loss": 0.8608295917510986, + "loss_ce": 0.0004779960436280817, + "loss_iou": 0.3984375, + "loss_num": 0.01312255859375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 342443800, + "step": 2711 + }, + { + "epoch": 0.6956967870198166, + "grad_norm": 70.694091796875, + "learning_rate": 5e-06, + "loss": 1.0076, + "num_input_tokens_seen": 342571276, + "step": 2712 + }, + { + "epoch": 0.6956967870198166, + "loss": 0.9104338884353638, + "loss_ce": 0.0022307434119284153, + "loss_iou": 0.427734375, + "loss_num": 0.010498046875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 342571276, + "step": 2712 + }, + { + "epoch": 0.6959533123837619, + "grad_norm": 44.73869323730469, + "learning_rate": 5e-06, + "loss": 1.1443, + "num_input_tokens_seen": 342697340, + "step": 2713 + }, + { + "epoch": 0.6959533123837619, + "loss": 0.7460817098617554, + "loss_ce": 0.00023210421204566956, + "loss_iou": 0.361328125, + "loss_num": 0.005096435546875, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 342697340, + "step": 2713 + }, + { + "epoch": 0.6962098377477073, + "grad_norm": 27.575767517089844, + "learning_rate": 5e-06, + "loss": 0.9685, + "num_input_tokens_seen": 342823164, + "step": 2714 + }, + { + "epoch": 0.6962098377477073, + "loss": 0.9147521257400513, + "loss_ce": 0.002398640615865588, + "loss_iou": 0.419921875, + "loss_num": 0.0145263671875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 342823164, + "step": 2714 + }, + { + "epoch": 0.6964663631116527, + "grad_norm": 20.031452178955078, + "learning_rate": 5e-06, + "loss": 0.9287, + "num_input_tokens_seen": 342949252, + "step": 2715 + }, + { + "epoch": 0.6964663631116527, + "loss": 1.0050671100616455, + "loss_ce": 0.0011608228087425232, + "loss_iou": 0.443359375, + "loss_num": 0.0235595703125, + "loss_xval": 1.0, + "num_input_tokens_seen": 342949252, + "step": 2715 + }, + { + "epoch": 0.696722888475598, + "grad_norm": 46.31381607055664, + "learning_rate": 5e-06, + "loss": 0.8859, + "num_input_tokens_seen": 343075168, + "step": 2716 + }, + { + "epoch": 0.696722888475598, + "loss": 0.8170726299285889, + "loss_ce": 0.002131205517798662, + "loss_iou": 0.37890625, + "loss_num": 0.01129150390625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 343075168, + "step": 2716 + }, + { + "epoch": 0.6969794138395434, + "grad_norm": 30.96465492248535, + "learning_rate": 5e-06, + "loss": 0.9029, + "num_input_tokens_seen": 343200668, + "step": 2717 + }, + { + "epoch": 0.6969794138395434, + "loss": 0.8198245763778687, + "loss_ce": 0.004639067221432924, + "loss_iou": 0.3828125, + "loss_num": 0.00970458984375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 343200668, + "step": 2717 + }, + { + "epoch": 0.6972359392034887, + "grad_norm": 27.06302261352539, + "learning_rate": 5e-06, + "loss": 0.8512, + "num_input_tokens_seen": 343326740, + "step": 2718 + }, + { + "epoch": 0.6972359392034887, + "loss": 0.8156639337539673, + "loss_ce": 0.0009666308760643005, + "loss_iou": 0.390625, + "loss_num": 0.006622314453125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 343326740, + "step": 2718 + }, + { + "epoch": 0.6974924645674341, + "grad_norm": 56.046234130859375, + "learning_rate": 5e-06, + "loss": 1.0681, + "num_input_tokens_seen": 343453308, + "step": 2719 + }, + { + "epoch": 0.6974924645674341, + "loss": 1.1742287874221802, + "loss_ce": 0.0008889258606359363, + "loss_iou": 0.5390625, + "loss_num": 0.019775390625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 343453308, + "step": 2719 + }, + { + "epoch": 0.6977489899313795, + "grad_norm": 50.78541946411133, + "learning_rate": 5e-06, + "loss": 1.0008, + "num_input_tokens_seen": 343580252, + "step": 2720 + }, + { + "epoch": 0.6977489899313795, + "loss": 0.8661501407623291, + "loss_ce": 0.0004274618113413453, + "loss_iou": 0.412109375, + "loss_num": 0.0081787109375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 343580252, + "step": 2720 + }, + { + "epoch": 0.6980055152953248, + "grad_norm": 29.01540756225586, + "learning_rate": 5e-06, + "loss": 0.9604, + "num_input_tokens_seen": 343707200, + "step": 2721 + }, + { + "epoch": 0.6980055152953248, + "loss": 1.0122689008712769, + "loss_ce": 0.003479864913970232, + "loss_iou": 0.43359375, + "loss_num": 0.0281982421875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 343707200, + "step": 2721 + }, + { + "epoch": 0.6982620406592702, + "grad_norm": 27.238037109375, + "learning_rate": 5e-06, + "loss": 0.9249, + "num_input_tokens_seen": 343832400, + "step": 2722 + }, + { + "epoch": 0.6982620406592702, + "loss": 1.0170339345932007, + "loss_ce": 0.0001882256183307618, + "loss_iou": 0.462890625, + "loss_num": 0.01806640625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 343832400, + "step": 2722 + }, + { + "epoch": 0.6985185660232155, + "grad_norm": 46.13434600830078, + "learning_rate": 5e-06, + "loss": 0.9796, + "num_input_tokens_seen": 343958812, + "step": 2723 + }, + { + "epoch": 0.6985185660232155, + "loss": 0.9416297674179077, + "loss_ce": 0.00022354410612024367, + "loss_iou": 0.431640625, + "loss_num": 0.0155029296875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 343958812, + "step": 2723 + }, + { + "epoch": 0.6987750913871609, + "grad_norm": 48.49302673339844, + "learning_rate": 5e-06, + "loss": 0.9929, + "num_input_tokens_seen": 344085444, + "step": 2724 + }, + { + "epoch": 0.6987750913871609, + "loss": 0.9323444366455078, + "loss_ce": 0.0007038245094008744, + "loss_iou": 0.423828125, + "loss_num": 0.016845703125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 344085444, + "step": 2724 + }, + { + "epoch": 0.6990316167511063, + "grad_norm": 36.19751739501953, + "learning_rate": 5e-06, + "loss": 0.9032, + "num_input_tokens_seen": 344209756, + "step": 2725 + }, + { + "epoch": 0.6990316167511063, + "loss": 1.071138620376587, + "loss_ce": 0.003755764337256551, + "loss_iou": 0.4921875, + "loss_num": 0.0164794921875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 344209756, + "step": 2725 + }, + { + "epoch": 0.6992881421150516, + "grad_norm": 49.690773010253906, + "learning_rate": 5e-06, + "loss": 0.9939, + "num_input_tokens_seen": 344335968, + "step": 2726 + }, + { + "epoch": 0.6992881421150516, + "loss": 0.7603789567947388, + "loss_ce": 0.0001250756613444537, + "loss_iou": 0.36328125, + "loss_num": 0.006317138671875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 344335968, + "step": 2726 + }, + { + "epoch": 0.699544667478997, + "grad_norm": 42.81621170043945, + "learning_rate": 5e-06, + "loss": 0.9928, + "num_input_tokens_seen": 344460832, + "step": 2727 + }, + { + "epoch": 0.699544667478997, + "loss": 0.8613684177398682, + "loss_ce": 0.0007726994226686656, + "loss_iou": 0.404296875, + "loss_num": 0.0101318359375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 344460832, + "step": 2727 + }, + { + "epoch": 0.6998011928429424, + "grad_norm": 38.60797119140625, + "learning_rate": 5e-06, + "loss": 0.9436, + "num_input_tokens_seen": 344586736, + "step": 2728 + }, + { + "epoch": 0.6998011928429424, + "loss": 1.0093849897384644, + "loss_ce": 0.0010842228075489402, + "loss_iou": 0.46484375, + "loss_num": 0.01556396484375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 344586736, + "step": 2728 + }, + { + "epoch": 0.7000577182068877, + "grad_norm": 48.785133361816406, + "learning_rate": 5e-06, + "loss": 1.0519, + "num_input_tokens_seen": 344714164, + "step": 2729 + }, + { + "epoch": 0.7000577182068877, + "loss": 1.0924474000930786, + "loss_ce": 0.001382911577820778, + "loss_iou": 0.48046875, + "loss_num": 0.0260009765625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 344714164, + "step": 2729 + }, + { + "epoch": 0.700314243570833, + "grad_norm": 51.45332717895508, + "learning_rate": 5e-06, + "loss": 0.9612, + "num_input_tokens_seen": 344839424, + "step": 2730 + }, + { + "epoch": 0.700314243570833, + "loss": 1.1332001686096191, + "loss_ce": 0.000875901139806956, + "loss_iou": 0.51953125, + "loss_num": 0.0177001953125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 344839424, + "step": 2730 + }, + { + "epoch": 0.7005707689347784, + "grad_norm": 56.605979919433594, + "learning_rate": 5e-06, + "loss": 0.9612, + "num_input_tokens_seen": 344966432, + "step": 2731 + }, + { + "epoch": 0.7005707689347784, + "loss": 0.680854082107544, + "loss_ce": 0.0006783228600397706, + "loss_iou": 0.32421875, + "loss_num": 0.006500244140625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 344966432, + "step": 2731 + }, + { + "epoch": 0.7008272942987238, + "grad_norm": 59.15030288696289, + "learning_rate": 5e-06, + "loss": 1.0101, + "num_input_tokens_seen": 345092688, + "step": 2732 + }, + { + "epoch": 0.7008272942987238, + "loss": 1.1291640996932983, + "loss_ce": 0.001234367024153471, + "loss_iou": 0.5, + "loss_num": 0.025634765625, + "loss_xval": 1.125, + "num_input_tokens_seen": 345092688, + "step": 2732 + }, + { + "epoch": 0.7010838196626692, + "grad_norm": 54.446998596191406, + "learning_rate": 5e-06, + "loss": 1.0027, + "num_input_tokens_seen": 345218600, + "step": 2733 + }, + { + "epoch": 0.7010838196626692, + "loss": 0.974226713180542, + "loss_ce": 0.0010821908945217729, + "loss_iou": 0.453125, + "loss_num": 0.01373291015625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 345218600, + "step": 2733 + }, + { + "epoch": 0.7013403450266145, + "grad_norm": 47.244693756103516, + "learning_rate": 5e-06, + "loss": 0.9379, + "num_input_tokens_seen": 345344412, + "step": 2734 + }, + { + "epoch": 0.7013403450266145, + "loss": 0.7605173587799072, + "loss_ce": 0.00612280610948801, + "loss_iou": 0.3359375, + "loss_num": 0.0167236328125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 345344412, + "step": 2734 + }, + { + "epoch": 0.7015968703905598, + "grad_norm": 52.274749755859375, + "learning_rate": 5e-06, + "loss": 0.9242, + "num_input_tokens_seen": 345471132, + "step": 2735 + }, + { + "epoch": 0.7015968703905598, + "loss": 0.9396077394485474, + "loss_ce": 0.0011311790440231562, + "loss_iou": 0.44140625, + "loss_num": 0.010986328125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 345471132, + "step": 2735 + }, + { + "epoch": 0.7018533957545052, + "grad_norm": 52.662445068359375, + "learning_rate": 5e-06, + "loss": 0.8902, + "num_input_tokens_seen": 345597072, + "step": 2736 + }, + { + "epoch": 0.7018533957545052, + "loss": 0.9931719899177551, + "loss_ce": 0.001961018657311797, + "loss_iou": 0.466796875, + "loss_num": 0.01123046875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 345597072, + "step": 2736 + }, + { + "epoch": 0.7021099211184506, + "grad_norm": 43.66284942626953, + "learning_rate": 5e-06, + "loss": 0.8943, + "num_input_tokens_seen": 345722988, + "step": 2737 + }, + { + "epoch": 0.7021099211184506, + "loss": 0.9050257205963135, + "loss_ce": 0.0004846805240958929, + "loss_iou": 0.423828125, + "loss_num": 0.01129150390625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 345722988, + "step": 2737 + }, + { + "epoch": 0.702366446482396, + "grad_norm": 40.173011779785156, + "learning_rate": 5e-06, + "loss": 1.0511, + "num_input_tokens_seen": 345848864, + "step": 2738 + }, + { + "epoch": 0.702366446482396, + "loss": 1.1606736183166504, + "loss_ce": 0.0014939035754650831, + "loss_iou": 0.53125, + "loss_num": 0.020263671875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 345848864, + "step": 2738 + }, + { + "epoch": 0.7026229718463413, + "grad_norm": 44.485721588134766, + "learning_rate": 5e-06, + "loss": 0.9568, + "num_input_tokens_seen": 345975356, + "step": 2739 + }, + { + "epoch": 0.7026229718463413, + "loss": 0.9853274822235107, + "loss_ce": 0.0029056090861558914, + "loss_iou": 0.45703125, + "loss_num": 0.01336669921875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 345975356, + "step": 2739 + }, + { + "epoch": 0.7028794972102866, + "grad_norm": 47.67707061767578, + "learning_rate": 5e-06, + "loss": 0.9212, + "num_input_tokens_seen": 346101964, + "step": 2740 + }, + { + "epoch": 0.7028794972102866, + "loss": 0.9242393374443054, + "loss_ce": 0.00041123118717223406, + "loss_iou": 0.43359375, + "loss_num": 0.0111083984375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 346101964, + "step": 2740 + }, + { + "epoch": 0.703136022574232, + "grad_norm": 59.65712356567383, + "learning_rate": 5e-06, + "loss": 1.0614, + "num_input_tokens_seen": 346228932, + "step": 2741 + }, + { + "epoch": 0.703136022574232, + "loss": 1.0530009269714355, + "loss_ce": 0.003196177538484335, + "loss_iou": 0.482421875, + "loss_num": 0.01708984375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 346228932, + "step": 2741 + }, + { + "epoch": 0.7033925479381774, + "grad_norm": 73.4288558959961, + "learning_rate": 5e-06, + "loss": 0.9609, + "num_input_tokens_seen": 346354428, + "step": 2742 + }, + { + "epoch": 0.7033925479381774, + "loss": 0.8972429633140564, + "loss_ce": 0.00027029725606553257, + "loss_iou": 0.41796875, + "loss_num": 0.01239013671875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 346354428, + "step": 2742 + }, + { + "epoch": 0.7036490733021228, + "grad_norm": 52.2337646484375, + "learning_rate": 5e-06, + "loss": 1.0075, + "num_input_tokens_seen": 346481648, + "step": 2743 + }, + { + "epoch": 0.7036490733021228, + "loss": 0.996415376663208, + "loss_ce": 0.000321689760312438, + "loss_iou": 0.466796875, + "loss_num": 0.0125732421875, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 346481648, + "step": 2743 + }, + { + "epoch": 0.703905598666068, + "grad_norm": 70.82198333740234, + "learning_rate": 5e-06, + "loss": 0.8422, + "num_input_tokens_seen": 346608712, + "step": 2744 + }, + { + "epoch": 0.703905598666068, + "loss": 0.8751461505889893, + "loss_ce": 0.0006344427238218486, + "loss_iou": 0.4140625, + "loss_num": 0.009765625, + "loss_xval": 0.875, + "num_input_tokens_seen": 346608712, + "step": 2744 + }, + { + "epoch": 0.7041621240300134, + "grad_norm": 47.04059982299805, + "learning_rate": 5e-06, + "loss": 0.9012, + "num_input_tokens_seen": 346734952, + "step": 2745 + }, + { + "epoch": 0.7041621240300134, + "loss": 0.8897431492805481, + "loss_ce": 0.0005829878500662744, + "loss_iou": 0.408203125, + "loss_num": 0.014404296875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 346734952, + "step": 2745 + }, + { + "epoch": 0.7044186493939588, + "grad_norm": 38.32073211669922, + "learning_rate": 5e-06, + "loss": 0.9106, + "num_input_tokens_seen": 346861996, + "step": 2746 + }, + { + "epoch": 0.7044186493939588, + "loss": 0.9947875142097473, + "loss_ce": 0.0006469347281381488, + "loss_iou": 0.4609375, + "loss_num": 0.0146484375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 346861996, + "step": 2746 + }, + { + "epoch": 0.7046751747579042, + "grad_norm": 42.436500549316406, + "learning_rate": 5e-06, + "loss": 0.9792, + "num_input_tokens_seen": 346988088, + "step": 2747 + }, + { + "epoch": 0.7046751747579042, + "loss": 1.012819766998291, + "loss_ce": 0.0011009767185896635, + "loss_iou": 0.470703125, + "loss_num": 0.014404296875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 346988088, + "step": 2747 + }, + { + "epoch": 0.7049317001218496, + "grad_norm": 55.620479583740234, + "learning_rate": 5e-06, + "loss": 0.9742, + "num_input_tokens_seen": 347115828, + "step": 2748 + }, + { + "epoch": 0.7049317001218496, + "loss": 0.902011513710022, + "loss_ce": 0.00015605230873916298, + "loss_iou": 0.421875, + "loss_num": 0.0120849609375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 347115828, + "step": 2748 + }, + { + "epoch": 0.705188225485795, + "grad_norm": 57.51042175292969, + "learning_rate": 5e-06, + "loss": 0.9742, + "num_input_tokens_seen": 347242860, + "step": 2749 + }, + { + "epoch": 0.705188225485795, + "loss": 0.9435006380081177, + "loss_ce": 0.00404755724593997, + "loss_iou": 0.44140625, + "loss_num": 0.0115966796875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 347242860, + "step": 2749 + }, + { + "epoch": 0.7054447508497402, + "grad_norm": 52.022430419921875, + "learning_rate": 5e-06, + "loss": 1.1115, + "num_input_tokens_seen": 347370100, + "step": 2750 + }, + { + "epoch": 0.7054447508497402, + "eval_icons_CIoU": 0.18221151363104582, + "eval_icons_GIoU": 0.1368772443383932, + "eval_icons_IoU": 0.38818876445293427, + "eval_icons_MAE_all": 0.030386446975171566, + "eval_icons_MAE_h": 0.049249306321144104, + "eval_icons_MAE_w": 0.05771993286907673, + "eval_icons_MAE_x_boxes": 0.05737135373055935, + "eval_icons_MAE_y_boxes": 0.0471673384308815, + "eval_icons_NUM_probability": 0.9998241066932678, + "eval_icons_inside_bbox": 0.6840277910232544, + "eval_icons_loss": 1.771482229232788, + "eval_icons_loss_ce": 0.00019089464331045747, + "eval_icons_loss_iou": 0.7828369140625, + "eval_icons_loss_num": 0.02957916259765625, + "eval_icons_loss_xval": 1.713623046875, + "eval_icons_runtime": 45.2576, + "eval_icons_samples_per_second": 1.105, + "eval_icons_steps_per_second": 0.044, + "num_input_tokens_seen": 347370100, + "step": 2750 + }, + { + "epoch": 0.7054447508497402, + "eval_screenspot_CIoU": 0.10589539259672165, + "eval_screenspot_GIoU": 0.09083433945973714, + "eval_screenspot_IoU": 0.27689432601133984, + "eval_screenspot_MAE_all": 0.0770096021393935, + "eval_screenspot_MAE_h": 0.0668790986140569, + "eval_screenspot_MAE_w": 0.12235869218905766, + "eval_screenspot_MAE_x_boxes": 0.1048525075117747, + "eval_screenspot_MAE_y_boxes": 0.054055714358886085, + "eval_screenspot_NUM_probability": 0.9999255339304606, + "eval_screenspot_inside_bbox": 0.5674999952316284, + "eval_screenspot_loss": 2.2556326389312744, + "eval_screenspot_loss_ce": 0.0011050066289802392, + "eval_screenspot_loss_iou": 0.9358723958333334, + "eval_screenspot_loss_num": 0.0831298828125, + "eval_screenspot_loss_xval": 2.2884114583333335, + "eval_screenspot_runtime": 78.1765, + "eval_screenspot_samples_per_second": 1.138, + "eval_screenspot_steps_per_second": 0.038, + "num_input_tokens_seen": 347370100, + "step": 2750 + }, + { + "epoch": 0.7054447508497402, + "loss": 2.2458667755126953, + "loss_ce": 0.0007495533209294081, + "loss_iou": 0.93359375, + "loss_num": 0.0751953125, + "loss_xval": 2.25, + "num_input_tokens_seen": 347370100, + "step": 2750 + }, + { + "epoch": 0.7057012762136856, + "grad_norm": 56.50239562988281, + "learning_rate": 5e-06, + "loss": 1.0143, + "num_input_tokens_seen": 347495844, + "step": 2751 + }, + { + "epoch": 0.7057012762136856, + "loss": 1.1214866638183594, + "loss_ce": 0.0028343352023512125, + "loss_iou": 0.5078125, + "loss_num": 0.0201416015625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 347495844, + "step": 2751 + }, + { + "epoch": 0.705957801577631, + "grad_norm": 33.508575439453125, + "learning_rate": 5e-06, + "loss": 1.009, + "num_input_tokens_seen": 347621700, + "step": 2752 + }, + { + "epoch": 0.705957801577631, + "loss": 0.8885220289230347, + "loss_ce": 0.0030239580664783716, + "loss_iou": 0.388671875, + "loss_num": 0.021240234375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 347621700, + "step": 2752 + }, + { + "epoch": 0.7062143269415764, + "grad_norm": 24.315092086791992, + "learning_rate": 5e-06, + "loss": 0.8856, + "num_input_tokens_seen": 347748220, + "step": 2753 + }, + { + "epoch": 0.7062143269415764, + "loss": 1.0310553312301636, + "loss_ce": 0.00029359152540564537, + "loss_iou": 0.48046875, + "loss_num": 0.01409912109375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 347748220, + "step": 2753 + }, + { + "epoch": 0.7064708523055218, + "grad_norm": 24.774351119995117, + "learning_rate": 5e-06, + "loss": 1.0362, + "num_input_tokens_seen": 347874724, + "step": 2754 + }, + { + "epoch": 0.7064708523055218, + "loss": 0.9249798059463501, + "loss_ce": 0.0011517030652612448, + "loss_iou": 0.4296875, + "loss_num": 0.01263427734375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 347874724, + "step": 2754 + }, + { + "epoch": 0.706727377669467, + "grad_norm": 23.18147850036621, + "learning_rate": 5e-06, + "loss": 0.8126, + "num_input_tokens_seen": 348001268, + "step": 2755 + }, + { + "epoch": 0.706727377669467, + "loss": 0.8833861351013184, + "loss_ce": 0.0049681575037539005, + "loss_iou": 0.400390625, + "loss_num": 0.015380859375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 348001268, + "step": 2755 + }, + { + "epoch": 0.7069839030334124, + "grad_norm": 33.20205307006836, + "learning_rate": 5e-06, + "loss": 0.9715, + "num_input_tokens_seen": 348127100, + "step": 2756 + }, + { + "epoch": 0.7069839030334124, + "loss": 0.9705176949501038, + "loss_ce": 0.0012794225476682186, + "loss_iou": 0.44921875, + "loss_num": 0.0142822265625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 348127100, + "step": 2756 + }, + { + "epoch": 0.7072404283973578, + "grad_norm": 39.273651123046875, + "learning_rate": 5e-06, + "loss": 0.845, + "num_input_tokens_seen": 348253460, + "step": 2757 + }, + { + "epoch": 0.7072404283973578, + "loss": 0.881648063659668, + "loss_ce": 0.0015211014542728662, + "loss_iou": 0.40234375, + "loss_num": 0.01483154296875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 348253460, + "step": 2757 + }, + { + "epoch": 0.7074969537613032, + "grad_norm": 43.06827926635742, + "learning_rate": 5e-06, + "loss": 0.8867, + "num_input_tokens_seen": 348378712, + "step": 2758 + }, + { + "epoch": 0.7074969537613032, + "loss": 0.9092092514038086, + "loss_ce": 0.0005179053987376392, + "loss_iou": 0.427734375, + "loss_num": 0.0106201171875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 348378712, + "step": 2758 + }, + { + "epoch": 0.7077534791252486, + "grad_norm": 54.80629348754883, + "learning_rate": 5e-06, + "loss": 1.0831, + "num_input_tokens_seen": 348503928, + "step": 2759 + }, + { + "epoch": 0.7077534791252486, + "loss": 0.9923757314682007, + "loss_ce": 0.004094492178410292, + "loss_iou": 0.43359375, + "loss_num": 0.02392578125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 348503928, + "step": 2759 + }, + { + "epoch": 0.7080100044891938, + "grad_norm": 61.461116790771484, + "learning_rate": 5e-06, + "loss": 0.947, + "num_input_tokens_seen": 348631084, + "step": 2760 + }, + { + "epoch": 0.7080100044891938, + "loss": 0.9746453762054443, + "loss_ce": 0.0021112486720085144, + "loss_iou": 0.443359375, + "loss_num": 0.01708984375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 348631084, + "step": 2760 + }, + { + "epoch": 0.7082665298531392, + "grad_norm": 60.19892883300781, + "learning_rate": 5e-06, + "loss": 0.8887, + "num_input_tokens_seen": 348757784, + "step": 2761 + }, + { + "epoch": 0.7082665298531392, + "loss": 0.7793081402778625, + "loss_ce": 0.0014761117054149508, + "loss_iou": 0.369140625, + "loss_num": 0.0079345703125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 348757784, + "step": 2761 + }, + { + "epoch": 0.7085230552170846, + "grad_norm": 53.89301300048828, + "learning_rate": 5e-06, + "loss": 0.8104, + "num_input_tokens_seen": 348884572, + "step": 2762 + }, + { + "epoch": 0.7085230552170846, + "loss": 0.8208542466163635, + "loss_ce": 0.0002975719980895519, + "loss_iou": 0.388671875, + "loss_num": 0.00897216796875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 348884572, + "step": 2762 + }, + { + "epoch": 0.70877958058103, + "grad_norm": 50.10633850097656, + "learning_rate": 5e-06, + "loss": 1.0352, + "num_input_tokens_seen": 349009984, + "step": 2763 + }, + { + "epoch": 0.70877958058103, + "loss": 1.0226970911026, + "loss_ce": 0.0026775544974952936, + "loss_iou": 0.474609375, + "loss_num": 0.01422119140625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 349009984, + "step": 2763 + }, + { + "epoch": 0.7090361059449753, + "grad_norm": 34.07736587524414, + "learning_rate": 5e-06, + "loss": 0.9485, + "num_input_tokens_seen": 349137032, + "step": 2764 + }, + { + "epoch": 0.7090361059449753, + "loss": 0.9381955862045288, + "loss_ce": 0.00020727534138131887, + "loss_iou": 0.42578125, + "loss_num": 0.0167236328125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 349137032, + "step": 2764 + }, + { + "epoch": 0.7092926313089206, + "grad_norm": 83.81598663330078, + "learning_rate": 5e-06, + "loss": 0.8864, + "num_input_tokens_seen": 349263400, + "step": 2765 + }, + { + "epoch": 0.7092926313089206, + "loss": 0.9453906416893005, + "loss_ce": 0.0015429839259013534, + "loss_iou": 0.43359375, + "loss_num": 0.015625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 349263400, + "step": 2765 + }, + { + "epoch": 0.709549156672866, + "grad_norm": 65.54422760009766, + "learning_rate": 5e-06, + "loss": 0.9968, + "num_input_tokens_seen": 349389204, + "step": 2766 + }, + { + "epoch": 0.709549156672866, + "loss": 0.8402853012084961, + "loss_ce": 0.0004415850853547454, + "loss_iou": 0.400390625, + "loss_num": 0.00738525390625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 349389204, + "step": 2766 + }, + { + "epoch": 0.7098056820368114, + "grad_norm": 50.178218841552734, + "learning_rate": 5e-06, + "loss": 0.9494, + "num_input_tokens_seen": 349515204, + "step": 2767 + }, + { + "epoch": 0.7098056820368114, + "loss": 1.1371309757232666, + "loss_ce": 0.0004122306127101183, + "loss_iou": 0.5078125, + "loss_num": 0.023681640625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 349515204, + "step": 2767 + }, + { + "epoch": 0.7100622074007568, + "grad_norm": 194.0366973876953, + "learning_rate": 5e-06, + "loss": 1.025, + "num_input_tokens_seen": 349642856, + "step": 2768 + }, + { + "epoch": 0.7100622074007568, + "loss": 0.981565535068512, + "loss_ce": 0.005491356831043959, + "loss_iou": 0.44921875, + "loss_num": 0.0155029296875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 349642856, + "step": 2768 + }, + { + "epoch": 0.7103187327647021, + "grad_norm": 43.64764404296875, + "learning_rate": 5e-06, + "loss": 0.9401, + "num_input_tokens_seen": 349769116, + "step": 2769 + }, + { + "epoch": 0.7103187327647021, + "loss": 0.9813637137413025, + "loss_ce": 0.008463321253657341, + "loss_iou": 0.43359375, + "loss_num": 0.0216064453125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 349769116, + "step": 2769 + }, + { + "epoch": 0.7105752581286475, + "grad_norm": 53.964969635009766, + "learning_rate": 5e-06, + "loss": 0.9251, + "num_input_tokens_seen": 349896016, + "step": 2770 + }, + { + "epoch": 0.7105752581286475, + "loss": 0.9335019588470459, + "loss_ce": 0.0003965099749621004, + "loss_iou": 0.439453125, + "loss_num": 0.01055908203125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 349896016, + "step": 2770 + }, + { + "epoch": 0.7108317834925928, + "grad_norm": 74.40316772460938, + "learning_rate": 5e-06, + "loss": 1.0022, + "num_input_tokens_seen": 350023356, + "step": 2771 + }, + { + "epoch": 0.7108317834925928, + "loss": 0.9753098487854004, + "loss_ce": 0.00021217142057139426, + "loss_iou": 0.453125, + "loss_num": 0.01416015625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 350023356, + "step": 2771 + }, + { + "epoch": 0.7110883088565382, + "grad_norm": 44.26226043701172, + "learning_rate": 5e-06, + "loss": 1.1925, + "num_input_tokens_seen": 350149440, + "step": 2772 + }, + { + "epoch": 0.7110883088565382, + "loss": 1.3607394695281982, + "loss_ce": 0.004782530479133129, + "loss_iou": 0.61328125, + "loss_num": 0.0262451171875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 350149440, + "step": 2772 + }, + { + "epoch": 0.7113448342204836, + "grad_norm": 35.82310485839844, + "learning_rate": 5e-06, + "loss": 0.9185, + "num_input_tokens_seen": 350274996, + "step": 2773 + }, + { + "epoch": 0.7113448342204836, + "loss": 1.07904052734375, + "loss_ce": 0.0009155577281489968, + "loss_iou": 0.486328125, + "loss_num": 0.0208740234375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 350274996, + "step": 2773 + }, + { + "epoch": 0.7116013595844289, + "grad_norm": 51.07183837890625, + "learning_rate": 5e-06, + "loss": 0.9482, + "num_input_tokens_seen": 350402640, + "step": 2774 + }, + { + "epoch": 0.7116013595844289, + "loss": 1.0929245948791504, + "loss_ce": 0.0006394553347490728, + "loss_iou": 0.490234375, + "loss_num": 0.021728515625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 350402640, + "step": 2774 + }, + { + "epoch": 0.7118578849483743, + "grad_norm": 43.68464660644531, + "learning_rate": 5e-06, + "loss": 1.0508, + "num_input_tokens_seen": 350529320, + "step": 2775 + }, + { + "epoch": 0.7118578849483743, + "loss": 1.1623339653015137, + "loss_ce": 0.0031542929355055094, + "loss_iou": 0.52734375, + "loss_num": 0.02099609375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 350529320, + "step": 2775 + }, + { + "epoch": 0.7121144103123196, + "grad_norm": 50.21672439575195, + "learning_rate": 5e-06, + "loss": 0.9472, + "num_input_tokens_seen": 350656784, + "step": 2776 + }, + { + "epoch": 0.7121144103123196, + "loss": 0.960762619972229, + "loss_ce": 0.0008017014479264617, + "loss_iou": 0.453125, + "loss_num": 0.01043701171875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 350656784, + "step": 2776 + }, + { + "epoch": 0.712370935676265, + "grad_norm": 50.02055358886719, + "learning_rate": 5e-06, + "loss": 1.1525, + "num_input_tokens_seen": 350783280, + "step": 2777 + }, + { + "epoch": 0.712370935676265, + "loss": 1.1245322227478027, + "loss_ce": 0.0029502154793590307, + "loss_iou": 0.51953125, + "loss_num": 0.01708984375, + "loss_xval": 1.125, + "num_input_tokens_seen": 350783280, + "step": 2777 + }, + { + "epoch": 0.7126274610402104, + "grad_norm": 42.00356674194336, + "learning_rate": 5e-06, + "loss": 0.8743, + "num_input_tokens_seen": 350909456, + "step": 2778 + }, + { + "epoch": 0.7126274610402104, + "loss": 0.8946172595024109, + "loss_ce": 0.0010625626891851425, + "loss_iou": 0.4140625, + "loss_num": 0.012939453125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 350909456, + "step": 2778 + }, + { + "epoch": 0.7128839864041557, + "grad_norm": 53.19773483276367, + "learning_rate": 5e-06, + "loss": 0.9403, + "num_input_tokens_seen": 351035720, + "step": 2779 + }, + { + "epoch": 0.7128839864041557, + "loss": 0.9870266914367676, + "loss_ce": 0.0006985502550378442, + "loss_iou": 0.46484375, + "loss_num": 0.01153564453125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 351035720, + "step": 2779 + }, + { + "epoch": 0.7131405117681011, + "grad_norm": 72.55758666992188, + "learning_rate": 5e-06, + "loss": 0.838, + "num_input_tokens_seen": 351162636, + "step": 2780 + }, + { + "epoch": 0.7131405117681011, + "loss": 0.8825046420097351, + "loss_ce": 0.000912790244910866, + "loss_iou": 0.41796875, + "loss_num": 0.009033203125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 351162636, + "step": 2780 + }, + { + "epoch": 0.7133970371320464, + "grad_norm": 44.77214431762695, + "learning_rate": 5e-06, + "loss": 1.0648, + "num_input_tokens_seen": 351288584, + "step": 2781 + }, + { + "epoch": 0.7133970371320464, + "loss": 0.9679480791091919, + "loss_ce": 0.000662887585349381, + "loss_iou": 0.4609375, + "loss_num": 0.00927734375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 351288584, + "step": 2781 + }, + { + "epoch": 0.7136535624959918, + "grad_norm": 22.387264251708984, + "learning_rate": 5e-06, + "loss": 0.985, + "num_input_tokens_seen": 351414900, + "step": 2782 + }, + { + "epoch": 0.7136535624959918, + "loss": 0.9609547853469849, + "loss_ce": 0.000993857509456575, + "loss_iou": 0.44140625, + "loss_num": 0.0150146484375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 351414900, + "step": 2782 + }, + { + "epoch": 0.7139100878599371, + "grad_norm": 35.68178939819336, + "learning_rate": 5e-06, + "loss": 0.9186, + "num_input_tokens_seen": 351541616, + "step": 2783 + }, + { + "epoch": 0.7139100878599371, + "loss": 0.8436436653137207, + "loss_ce": 0.0016026501543819904, + "loss_iou": 0.37890625, + "loss_num": 0.0169677734375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 351541616, + "step": 2783 + }, + { + "epoch": 0.7141666132238825, + "grad_norm": 41.338600158691406, + "learning_rate": 5e-06, + "loss": 0.9077, + "num_input_tokens_seen": 351670516, + "step": 2784 + }, + { + "epoch": 0.7141666132238825, + "loss": 0.703155517578125, + "loss_ce": 0.0024719128850847483, + "loss_iou": 0.330078125, + "loss_num": 0.00836181640625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 351670516, + "step": 2784 + }, + { + "epoch": 0.7144231385878279, + "grad_norm": 45.5844841003418, + "learning_rate": 5e-06, + "loss": 0.9989, + "num_input_tokens_seen": 351796880, + "step": 2785 + }, + { + "epoch": 0.7144231385878279, + "loss": 1.1670355796813965, + "loss_ce": 0.0005317461909726262, + "loss_iou": 0.5234375, + "loss_num": 0.0245361328125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 351796880, + "step": 2785 + }, + { + "epoch": 0.7146796639517732, + "grad_norm": 38.859500885009766, + "learning_rate": 5e-06, + "loss": 0.8286, + "num_input_tokens_seen": 351923828, + "step": 2786 + }, + { + "epoch": 0.7146796639517732, + "loss": 0.8668532371520996, + "loss_ce": 0.000642305938526988, + "loss_iou": 0.396484375, + "loss_num": 0.0147705078125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 351923828, + "step": 2786 + }, + { + "epoch": 0.7149361893157186, + "grad_norm": 50.20237731933594, + "learning_rate": 5e-06, + "loss": 0.9814, + "num_input_tokens_seen": 352049704, + "step": 2787 + }, + { + "epoch": 0.7149361893157186, + "loss": 0.8779653310775757, + "loss_ce": 0.0010121985105797648, + "loss_iou": 0.416015625, + "loss_num": 0.00921630859375, + "loss_xval": 0.875, + "num_input_tokens_seen": 352049704, + "step": 2787 + }, + { + "epoch": 0.7151927146796639, + "grad_norm": 37.611289978027344, + "learning_rate": 5e-06, + "loss": 0.9453, + "num_input_tokens_seen": 352175568, + "step": 2788 + }, + { + "epoch": 0.7151927146796639, + "loss": 0.9001970291137695, + "loss_ce": 0.0005388229619711637, + "loss_iou": 0.40625, + "loss_num": 0.0177001953125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 352175568, + "step": 2788 + }, + { + "epoch": 0.7154492400436093, + "grad_norm": 47.84310531616211, + "learning_rate": 5e-06, + "loss": 0.9853, + "num_input_tokens_seen": 352302844, + "step": 2789 + }, + { + "epoch": 0.7154492400436093, + "loss": 0.9561954140663147, + "loss_ce": 0.002582116983830929, + "loss_iou": 0.4375, + "loss_num": 0.0157470703125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 352302844, + "step": 2789 + }, + { + "epoch": 0.7157057654075547, + "grad_norm": 45.53949737548828, + "learning_rate": 5e-06, + "loss": 0.8535, + "num_input_tokens_seen": 352428584, + "step": 2790 + }, + { + "epoch": 0.7157057654075547, + "loss": 0.9420002698898315, + "loss_ce": 0.0005940085975453258, + "loss_iou": 0.412109375, + "loss_num": 0.0235595703125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 352428584, + "step": 2790 + }, + { + "epoch": 0.7159622907715001, + "grad_norm": 68.96114349365234, + "learning_rate": 5e-06, + "loss": 0.959, + "num_input_tokens_seen": 352556272, + "step": 2791 + }, + { + "epoch": 0.7159622907715001, + "loss": 0.9124839305877686, + "loss_ce": 0.000862844055518508, + "loss_iou": 0.419921875, + "loss_num": 0.0145263671875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 352556272, + "step": 2791 + }, + { + "epoch": 0.7162188161354454, + "grad_norm": 44.24248123168945, + "learning_rate": 5e-06, + "loss": 1.0669, + "num_input_tokens_seen": 352680992, + "step": 2792 + }, + { + "epoch": 0.7162188161354454, + "loss": 1.0508984327316284, + "loss_ce": 0.00011717645975295454, + "loss_iou": 0.4921875, + "loss_num": 0.0133056640625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 352680992, + "step": 2792 + }, + { + "epoch": 0.7164753414993907, + "grad_norm": 48.47018814086914, + "learning_rate": 5e-06, + "loss": 0.9928, + "num_input_tokens_seen": 352807984, + "step": 2793 + }, + { + "epoch": 0.7164753414993907, + "loss": 0.9425259232521057, + "loss_ce": 0.00014312152052298188, + "loss_iou": 0.439453125, + "loss_num": 0.01300048828125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 352807984, + "step": 2793 + }, + { + "epoch": 0.7167318668633361, + "grad_norm": 48.998779296875, + "learning_rate": 5e-06, + "loss": 1.0337, + "num_input_tokens_seen": 352933992, + "step": 2794 + }, + { + "epoch": 0.7167318668633361, + "loss": 0.8462742567062378, + "loss_ce": 0.0005711799603886902, + "loss_iou": 0.3984375, + "loss_num": 0.00927734375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 352933992, + "step": 2794 + }, + { + "epoch": 0.7169883922272815, + "grad_norm": 32.27525329589844, + "learning_rate": 5e-06, + "loss": 0.9177, + "num_input_tokens_seen": 353060692, + "step": 2795 + }, + { + "epoch": 0.7169883922272815, + "loss": 0.9208483695983887, + "loss_ce": 0.0006823799922131002, + "loss_iou": 0.431640625, + "loss_num": 0.01123046875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 353060692, + "step": 2795 + }, + { + "epoch": 0.7172449175912269, + "grad_norm": 70.03369140625, + "learning_rate": 5e-06, + "loss": 1.0554, + "num_input_tokens_seen": 353187684, + "step": 2796 + }, + { + "epoch": 0.7172449175912269, + "loss": 1.1553441286087036, + "loss_ce": 0.001535541843622923, + "loss_iou": 0.515625, + "loss_num": 0.0235595703125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 353187684, + "step": 2796 + }, + { + "epoch": 0.7175014429551722, + "grad_norm": 44.487857818603516, + "learning_rate": 5e-06, + "loss": 0.9593, + "num_input_tokens_seen": 353313620, + "step": 2797 + }, + { + "epoch": 0.7175014429551722, + "loss": 0.9535285234451294, + "loss_ce": 0.0008917720406316221, + "loss_iou": 0.453125, + "loss_num": 0.00946044921875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 353313620, + "step": 2797 + }, + { + "epoch": 0.7177579683191175, + "grad_norm": 19.193157196044922, + "learning_rate": 5e-06, + "loss": 0.8899, + "num_input_tokens_seen": 353439896, + "step": 2798 + }, + { + "epoch": 0.7177579683191175, + "loss": 0.9076510071754456, + "loss_ce": 0.001889252569526434, + "loss_iou": 0.4140625, + "loss_num": 0.0157470703125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 353439896, + "step": 2798 + }, + { + "epoch": 0.7180144936830629, + "grad_norm": 30.17259979248047, + "learning_rate": 5e-06, + "loss": 0.8296, + "num_input_tokens_seen": 353566496, + "step": 2799 + }, + { + "epoch": 0.7180144936830629, + "loss": 0.9245446920394897, + "loss_ce": 0.0007165212882682681, + "loss_iou": 0.42578125, + "loss_num": 0.014404296875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 353566496, + "step": 2799 + }, + { + "epoch": 0.7182710190470083, + "grad_norm": 73.41886901855469, + "learning_rate": 5e-06, + "loss": 0.9407, + "num_input_tokens_seen": 353692728, + "step": 2800 + }, + { + "epoch": 0.7182710190470083, + "loss": 0.8926745057106018, + "loss_ce": 0.0010729453060775995, + "loss_iou": 0.40234375, + "loss_num": 0.017822265625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 353692728, + "step": 2800 + }, + { + "epoch": 0.7185275444109537, + "grad_norm": 54.74946212768555, + "learning_rate": 5e-06, + "loss": 1.0948, + "num_input_tokens_seen": 353819004, + "step": 2801 + }, + { + "epoch": 0.7185275444109537, + "loss": 1.0816195011138916, + "loss_ce": 0.002029532566666603, + "loss_iou": 0.490234375, + "loss_num": 0.0198974609375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 353819004, + "step": 2801 + }, + { + "epoch": 0.718784069774899, + "grad_norm": 21.141510009765625, + "learning_rate": 5e-06, + "loss": 0.9458, + "num_input_tokens_seen": 353945880, + "step": 2802 + }, + { + "epoch": 0.718784069774899, + "loss": 0.906072199344635, + "loss_ce": 0.000310474366415292, + "loss_iou": 0.41796875, + "loss_num": 0.0142822265625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 353945880, + "step": 2802 + }, + { + "epoch": 0.7190405951388443, + "grad_norm": 30.580053329467773, + "learning_rate": 5e-06, + "loss": 0.8545, + "num_input_tokens_seen": 354072260, + "step": 2803 + }, + { + "epoch": 0.7190405951388443, + "loss": 0.8698553442955017, + "loss_ce": 0.0002264417998958379, + "loss_iou": 0.419921875, + "loss_num": 0.006317138671875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 354072260, + "step": 2803 + }, + { + "epoch": 0.7192971205027897, + "grad_norm": 56.03858184814453, + "learning_rate": 5e-06, + "loss": 0.8927, + "num_input_tokens_seen": 354199704, + "step": 2804 + }, + { + "epoch": 0.7192971205027897, + "loss": 1.030813455581665, + "loss_ce": 0.002004914451390505, + "loss_iou": 0.466796875, + "loss_num": 0.018798828125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 354199704, + "step": 2804 + }, + { + "epoch": 0.7195536458667351, + "grad_norm": 58.24660110473633, + "learning_rate": 5e-06, + "loss": 0.9771, + "num_input_tokens_seen": 354326296, + "step": 2805 + }, + { + "epoch": 0.7195536458667351, + "loss": 0.9549286365509033, + "loss_ce": 0.0008271246333606541, + "loss_iou": 0.44140625, + "loss_num": 0.01416015625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 354326296, + "step": 2805 + }, + { + "epoch": 0.7198101712306805, + "grad_norm": 49.21638488769531, + "learning_rate": 5e-06, + "loss": 1.01, + "num_input_tokens_seen": 354452416, + "step": 2806 + }, + { + "epoch": 0.7198101712306805, + "loss": 1.0798671245574951, + "loss_ce": 0.0007655572262592614, + "loss_iou": 0.484375, + "loss_num": 0.0220947265625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 354452416, + "step": 2806 + }, + { + "epoch": 0.7200666965946257, + "grad_norm": 56.945655822753906, + "learning_rate": 5e-06, + "loss": 0.9735, + "num_input_tokens_seen": 354579052, + "step": 2807 + }, + { + "epoch": 0.7200666965946257, + "loss": 1.0302178859710693, + "loss_ce": 0.0009209836134687066, + "loss_iou": 0.4765625, + "loss_num": 0.01483154296875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 354579052, + "step": 2807 + }, + { + "epoch": 0.7203232219585711, + "grad_norm": 64.86836242675781, + "learning_rate": 5e-06, + "loss": 0.8812, + "num_input_tokens_seen": 354706604, + "step": 2808 + }, + { + "epoch": 0.7203232219585711, + "loss": 0.8263351321220398, + "loss_ce": 0.0016280682757496834, + "loss_iou": 0.38671875, + "loss_num": 0.00994873046875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 354706604, + "step": 2808 + }, + { + "epoch": 0.7205797473225165, + "grad_norm": 51.22687530517578, + "learning_rate": 5e-06, + "loss": 0.8996, + "num_input_tokens_seen": 354832924, + "step": 2809 + }, + { + "epoch": 0.7205797473225165, + "loss": 0.9596093893051147, + "loss_ce": 0.00013671658234670758, + "loss_iou": 0.451171875, + "loss_num": 0.0113525390625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 354832924, + "step": 2809 + }, + { + "epoch": 0.7208362726864619, + "grad_norm": 34.734073638916016, + "learning_rate": 5e-06, + "loss": 0.9301, + "num_input_tokens_seen": 354959556, + "step": 2810 + }, + { + "epoch": 0.7208362726864619, + "loss": 0.8900448083877563, + "loss_ce": 0.0003963771741837263, + "loss_iou": 0.408203125, + "loss_num": 0.01470947265625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 354959556, + "step": 2810 + }, + { + "epoch": 0.7210927980504073, + "grad_norm": 41.50166320800781, + "learning_rate": 5e-06, + "loss": 0.9234, + "num_input_tokens_seen": 355085776, + "step": 2811 + }, + { + "epoch": 0.7210927980504073, + "loss": 0.9190911650657654, + "loss_ce": 0.0016106865368783474, + "loss_iou": 0.4296875, + "loss_num": 0.01190185546875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 355085776, + "step": 2811 + }, + { + "epoch": 0.7213493234143525, + "grad_norm": 55.22750473022461, + "learning_rate": 5e-06, + "loss": 0.9103, + "num_input_tokens_seen": 355213272, + "step": 2812 + }, + { + "epoch": 0.7213493234143525, + "loss": 1.0459450483322144, + "loss_ce": 0.0019998103380203247, + "loss_iou": 0.46484375, + "loss_num": 0.0224609375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 355213272, + "step": 2812 + }, + { + "epoch": 0.7216058487782979, + "grad_norm": 47.21759033203125, + "learning_rate": 5e-06, + "loss": 0.9078, + "num_input_tokens_seen": 355339552, + "step": 2813 + }, + { + "epoch": 0.7216058487782979, + "loss": 0.8720395565032959, + "loss_ce": 0.0024106630589812994, + "loss_iou": 0.416015625, + "loss_num": 0.00775146484375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 355339552, + "step": 2813 + }, + { + "epoch": 0.7218623741422433, + "grad_norm": 52.06370162963867, + "learning_rate": 5e-06, + "loss": 0.8907, + "num_input_tokens_seen": 355467080, + "step": 2814 + }, + { + "epoch": 0.7218623741422433, + "loss": 1.166247844696045, + "loss_ce": 0.0021853891666978598, + "loss_iou": 0.52734375, + "loss_num": 0.022216796875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 355467080, + "step": 2814 + }, + { + "epoch": 0.7221188995061887, + "grad_norm": 43.77872085571289, + "learning_rate": 5e-06, + "loss": 1.1128, + "num_input_tokens_seen": 355593732, + "step": 2815 + }, + { + "epoch": 0.7221188995061887, + "loss": 1.0303559303283691, + "loss_ce": 0.0005707253003492951, + "loss_iou": 0.474609375, + "loss_num": 0.0159912109375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 355593732, + "step": 2815 + }, + { + "epoch": 0.7223754248701341, + "grad_norm": 43.35484313964844, + "learning_rate": 5e-06, + "loss": 0.9373, + "num_input_tokens_seen": 355720060, + "step": 2816 + }, + { + "epoch": 0.7223754248701341, + "loss": 0.9670222997665405, + "loss_ce": 0.0021785483695566654, + "loss_iou": 0.4375, + "loss_num": 0.0181884765625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 355720060, + "step": 2816 + }, + { + "epoch": 0.7226319502340794, + "grad_norm": 46.44950485229492, + "learning_rate": 5e-06, + "loss": 0.9124, + "num_input_tokens_seen": 355848268, + "step": 2817 + }, + { + "epoch": 0.7226319502340794, + "loss": 1.0489916801452637, + "loss_ce": 0.0006518606096506119, + "loss_iou": 0.48828125, + "loss_num": 0.0140380859375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 355848268, + "step": 2817 + }, + { + "epoch": 0.7228884755980247, + "grad_norm": 44.861228942871094, + "learning_rate": 5e-06, + "loss": 0.8598, + "num_input_tokens_seen": 355975288, + "step": 2818 + }, + { + "epoch": 0.7228884755980247, + "loss": 0.9132239818572998, + "loss_ce": 0.00038213114021345973, + "loss_iou": 0.416015625, + "loss_num": 0.0164794921875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 355975288, + "step": 2818 + }, + { + "epoch": 0.7231450009619701, + "grad_norm": 64.87090301513672, + "learning_rate": 5e-06, + "loss": 0.9213, + "num_input_tokens_seen": 356101784, + "step": 2819 + }, + { + "epoch": 0.7231450009619701, + "loss": 0.7225340604782104, + "loss_ce": 0.00012193157454021275, + "loss_iou": 0.345703125, + "loss_num": 0.005859375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 356101784, + "step": 2819 + }, + { + "epoch": 0.7234015263259155, + "grad_norm": 63.23878860473633, + "learning_rate": 5e-06, + "loss": 1.0303, + "num_input_tokens_seen": 356228496, + "step": 2820 + }, + { + "epoch": 0.7234015263259155, + "loss": 1.0843150615692139, + "loss_ce": 0.00033060350688174367, + "loss_iou": 0.5078125, + "loss_num": 0.01312255859375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 356228496, + "step": 2820 + }, + { + "epoch": 0.7236580516898609, + "grad_norm": 52.69314193725586, + "learning_rate": 5e-06, + "loss": 1.0939, + "num_input_tokens_seen": 356354760, + "step": 2821 + }, + { + "epoch": 0.7236580516898609, + "loss": 1.1579294204711914, + "loss_ce": 0.0007028396939858794, + "loss_iou": 0.54296875, + "loss_num": 0.0146484375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 356354760, + "step": 2821 + }, + { + "epoch": 0.7239145770538062, + "grad_norm": 40.191829681396484, + "learning_rate": 5e-06, + "loss": 1.0158, + "num_input_tokens_seen": 356480748, + "step": 2822 + }, + { + "epoch": 0.7239145770538062, + "loss": 1.1308033466339111, + "loss_ce": 0.0023854016326367855, + "loss_iou": 0.515625, + "loss_num": 0.019287109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 356480748, + "step": 2822 + }, + { + "epoch": 0.7241711024177515, + "grad_norm": 37.723724365234375, + "learning_rate": 5e-06, + "loss": 0.9702, + "num_input_tokens_seen": 356607024, + "step": 2823 + }, + { + "epoch": 0.7241711024177515, + "loss": 1.0781813859939575, + "loss_ce": 0.000544671667739749, + "loss_iou": 0.4921875, + "loss_num": 0.01904296875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 356607024, + "step": 2823 + }, + { + "epoch": 0.7244276277816969, + "grad_norm": 57.555667877197266, + "learning_rate": 5e-06, + "loss": 0.9953, + "num_input_tokens_seen": 356733324, + "step": 2824 + }, + { + "epoch": 0.7244276277816969, + "loss": 1.0243428945541382, + "loss_ce": 0.0026143412105739117, + "loss_iou": 0.47265625, + "loss_num": 0.0155029296875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 356733324, + "step": 2824 + }, + { + "epoch": 0.7246841531456423, + "grad_norm": 61.309967041015625, + "learning_rate": 5e-06, + "loss": 1.0171, + "num_input_tokens_seen": 356859072, + "step": 2825 + }, + { + "epoch": 0.7246841531456423, + "loss": 1.1009202003479004, + "loss_ce": 0.0008224837947636843, + "loss_iou": 0.5078125, + "loss_num": 0.01708984375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 356859072, + "step": 2825 + }, + { + "epoch": 0.7249406785095877, + "grad_norm": 60.1578254699707, + "learning_rate": 5e-06, + "loss": 0.9949, + "num_input_tokens_seen": 356984172, + "step": 2826 + }, + { + "epoch": 0.7249406785095877, + "loss": 0.9942096471786499, + "loss_ce": 0.0005573141388595104, + "loss_iou": 0.46484375, + "loss_num": 0.0126953125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 356984172, + "step": 2826 + }, + { + "epoch": 0.725197203873533, + "grad_norm": 48.544612884521484, + "learning_rate": 5e-06, + "loss": 1.0448, + "num_input_tokens_seen": 357111116, + "step": 2827 + }, + { + "epoch": 0.725197203873533, + "loss": 0.939170777797699, + "loss_ce": 0.00020591789507307112, + "loss_iou": 0.44140625, + "loss_num": 0.0107421875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 357111116, + "step": 2827 + }, + { + "epoch": 0.7254537292374783, + "grad_norm": 39.68471908569336, + "learning_rate": 5e-06, + "loss": 0.8714, + "num_input_tokens_seen": 357237900, + "step": 2828 + }, + { + "epoch": 0.7254537292374783, + "loss": 0.9213587045669556, + "loss_ce": 0.0007044594385661185, + "loss_iou": 0.43359375, + "loss_num": 0.01031494140625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 357237900, + "step": 2828 + }, + { + "epoch": 0.7257102546014237, + "grad_norm": 44.247867584228516, + "learning_rate": 5e-06, + "loss": 0.9762, + "num_input_tokens_seen": 357364320, + "step": 2829 + }, + { + "epoch": 0.7257102546014237, + "loss": 0.7812053561210632, + "loss_ce": 0.0009319039527326822, + "loss_iou": 0.376953125, + "loss_num": 0.00531005859375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 357364320, + "step": 2829 + }, + { + "epoch": 0.7259667799653691, + "grad_norm": 43.59778594970703, + "learning_rate": 5e-06, + "loss": 0.9531, + "num_input_tokens_seen": 357490320, + "step": 2830 + }, + { + "epoch": 0.7259667799653691, + "loss": 0.8646785616874695, + "loss_ce": 0.00042075279634445906, + "loss_iou": 0.40625, + "loss_num": 0.01043701171875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 357490320, + "step": 2830 + }, + { + "epoch": 0.7262233053293145, + "grad_norm": 46.14194107055664, + "learning_rate": 5e-06, + "loss": 0.9781, + "num_input_tokens_seen": 357616924, + "step": 2831 + }, + { + "epoch": 0.7262233053293145, + "loss": 1.021560788154602, + "loss_ce": 0.00300608086399734, + "loss_iou": 0.46484375, + "loss_num": 0.0174560546875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 357616924, + "step": 2831 + }, + { + "epoch": 0.7264798306932598, + "grad_norm": 53.14125442504883, + "learning_rate": 5e-06, + "loss": 1.0364, + "num_input_tokens_seen": 357743600, + "step": 2832 + }, + { + "epoch": 0.7264798306932598, + "loss": 0.9118888974189758, + "loss_ce": 0.0012443918967619538, + "loss_iou": 0.404296875, + "loss_num": 0.0205078125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 357743600, + "step": 2832 + }, + { + "epoch": 0.7267363560572051, + "grad_norm": 64.60108184814453, + "learning_rate": 5e-06, + "loss": 1.0126, + "num_input_tokens_seen": 357870412, + "step": 2833 + }, + { + "epoch": 0.7267363560572051, + "loss": 1.270204782485962, + "loss_ce": 0.0016501240897923708, + "loss_iou": 0.5859375, + "loss_num": 0.0201416015625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 357870412, + "step": 2833 + }, + { + "epoch": 0.7269928814211505, + "grad_norm": 41.20368576049805, + "learning_rate": 5e-06, + "loss": 1.0274, + "num_input_tokens_seen": 357995752, + "step": 2834 + }, + { + "epoch": 0.7269928814211505, + "loss": 0.9151555299758911, + "loss_ce": 0.00255783274769783, + "loss_iou": 0.419921875, + "loss_num": 0.01446533203125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 357995752, + "step": 2834 + }, + { + "epoch": 0.7272494067850959, + "grad_norm": 35.52908706665039, + "learning_rate": 5e-06, + "loss": 1.016, + "num_input_tokens_seen": 358122240, + "step": 2835 + }, + { + "epoch": 0.7272494067850959, + "loss": 0.9484637975692749, + "loss_ce": 0.0007098839851096272, + "loss_iou": 0.431640625, + "loss_num": 0.016845703125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 358122240, + "step": 2835 + }, + { + "epoch": 0.7275059321490412, + "grad_norm": 64.89720916748047, + "learning_rate": 5e-06, + "loss": 0.9772, + "num_input_tokens_seen": 358249252, + "step": 2836 + }, + { + "epoch": 0.7275059321490412, + "loss": 0.906116247177124, + "loss_ce": 0.00133102061226964, + "loss_iou": 0.43359375, + "loss_num": 0.00750732421875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 358249252, + "step": 2836 + }, + { + "epoch": 0.7277624575129866, + "grad_norm": 62.25846481323242, + "learning_rate": 5e-06, + "loss": 0.9688, + "num_input_tokens_seen": 358374932, + "step": 2837 + }, + { + "epoch": 0.7277624575129866, + "loss": 1.0456215143203735, + "loss_ce": 0.0006996238371357322, + "loss_iou": 0.48046875, + "loss_num": 0.0172119140625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 358374932, + "step": 2837 + }, + { + "epoch": 0.728018982876932, + "grad_norm": 17.83930015563965, + "learning_rate": 5e-06, + "loss": 0.9387, + "num_input_tokens_seen": 358501308, + "step": 2838 + }, + { + "epoch": 0.728018982876932, + "loss": 0.886888861656189, + "loss_ce": 0.0016349740326404572, + "loss_iou": 0.419921875, + "loss_num": 0.00927734375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 358501308, + "step": 2838 + }, + { + "epoch": 0.7282755082408773, + "grad_norm": 27.911901473999023, + "learning_rate": 5e-06, + "loss": 0.8681, + "num_input_tokens_seen": 358627888, + "step": 2839 + }, + { + "epoch": 0.7282755082408773, + "loss": 0.6563936471939087, + "loss_ce": 0.0011202108580619097, + "loss_iou": 0.30859375, + "loss_num": 0.007568359375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 358627888, + "step": 2839 + }, + { + "epoch": 0.7285320336048227, + "grad_norm": 24.563753128051758, + "learning_rate": 5e-06, + "loss": 0.9155, + "num_input_tokens_seen": 358754056, + "step": 2840 + }, + { + "epoch": 0.7285320336048227, + "loss": 0.8562033176422119, + "loss_ce": 0.000978671247139573, + "loss_iou": 0.40234375, + "loss_num": 0.010009765625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 358754056, + "step": 2840 + }, + { + "epoch": 0.728788558968768, + "grad_norm": 40.74066925048828, + "learning_rate": 5e-06, + "loss": 1.0861, + "num_input_tokens_seen": 358880416, + "step": 2841 + }, + { + "epoch": 0.728788558968768, + "loss": 1.2149285078048706, + "loss_ce": 0.0005729615804739296, + "loss_iou": 0.546875, + "loss_num": 0.023681640625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 358880416, + "step": 2841 + }, + { + "epoch": 0.7290450843327134, + "grad_norm": 108.10514068603516, + "learning_rate": 5e-06, + "loss": 1.0185, + "num_input_tokens_seen": 359006996, + "step": 2842 + }, + { + "epoch": 0.7290450843327134, + "loss": 0.9570274949073792, + "loss_ce": 0.0014611243968829513, + "loss_iou": 0.44140625, + "loss_num": 0.01495361328125, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 359006996, + "step": 2842 + }, + { + "epoch": 0.7293016096966588, + "grad_norm": 45.7574348449707, + "learning_rate": 5e-06, + "loss": 1.0235, + "num_input_tokens_seen": 359133464, + "step": 2843 + }, + { + "epoch": 0.7293016096966588, + "loss": 0.99357008934021, + "loss_ce": 0.0004060387727804482, + "loss_iou": 0.4609375, + "loss_num": 0.0142822265625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 359133464, + "step": 2843 + }, + { + "epoch": 0.7295581350606041, + "grad_norm": 14.68533706665039, + "learning_rate": 5e-06, + "loss": 0.932, + "num_input_tokens_seen": 359259272, + "step": 2844 + }, + { + "epoch": 0.7295581350606041, + "loss": 0.9392783641815186, + "loss_ce": 0.0008017909131012857, + "loss_iou": 0.431640625, + "loss_num": 0.01513671875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 359259272, + "step": 2844 + }, + { + "epoch": 0.7298146604245495, + "grad_norm": 36.88554382324219, + "learning_rate": 5e-06, + "loss": 1.0173, + "num_input_tokens_seen": 359386020, + "step": 2845 + }, + { + "epoch": 0.7298146604245495, + "loss": 1.1052169799804688, + "loss_ce": 0.0026779568288475275, + "loss_iou": 0.5, + "loss_num": 0.019775390625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 359386020, + "step": 2845 + }, + { + "epoch": 0.7300711857884948, + "grad_norm": 54.32188034057617, + "learning_rate": 5e-06, + "loss": 0.94, + "num_input_tokens_seen": 359512276, + "step": 2846 + }, + { + "epoch": 0.7300711857884948, + "loss": 0.9647353887557983, + "loss_ce": 0.004041970707476139, + "loss_iou": 0.42578125, + "loss_num": 0.0218505859375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 359512276, + "step": 2846 + }, + { + "epoch": 0.7303277111524402, + "grad_norm": 90.82463073730469, + "learning_rate": 5e-06, + "loss": 0.927, + "num_input_tokens_seen": 359640012, + "step": 2847 + }, + { + "epoch": 0.7303277111524402, + "loss": 0.9547678828239441, + "loss_ce": 0.0016428836388513446, + "loss_iou": 0.44140625, + "loss_num": 0.01373291015625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 359640012, + "step": 2847 + }, + { + "epoch": 0.7305842365163856, + "grad_norm": 43.46992111206055, + "learning_rate": 5e-06, + "loss": 1.1389, + "num_input_tokens_seen": 359766556, + "step": 2848 + }, + { + "epoch": 0.7305842365163856, + "loss": 1.2656770944595337, + "loss_ce": 0.002005229238420725, + "loss_iou": 0.58203125, + "loss_num": 0.020263671875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 359766556, + "step": 2848 + }, + { + "epoch": 0.7308407618803309, + "grad_norm": 41.525638580322266, + "learning_rate": 5e-06, + "loss": 0.8855, + "num_input_tokens_seen": 359893380, + "step": 2849 + }, + { + "epoch": 0.7308407618803309, + "loss": 0.9961868524551392, + "loss_ce": 0.003022819757461548, + "loss_iou": 0.462890625, + "loss_num": 0.01373291015625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 359893380, + "step": 2849 + }, + { + "epoch": 0.7310972872442763, + "grad_norm": 49.34945297241211, + "learning_rate": 5e-06, + "loss": 0.9036, + "num_input_tokens_seen": 360020344, + "step": 2850 + }, + { + "epoch": 0.7310972872442763, + "loss": 0.885448694229126, + "loss_ce": 0.0016596624627709389, + "loss_iou": 0.4140625, + "loss_num": 0.01123046875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 360020344, + "step": 2850 + }, + { + "epoch": 0.7313538126082216, + "grad_norm": 47.24940490722656, + "learning_rate": 5e-06, + "loss": 1.0692, + "num_input_tokens_seen": 360146952, + "step": 2851 + }, + { + "epoch": 0.7313538126082216, + "loss": 1.1705174446105957, + "loss_ce": 0.0035252785310149193, + "loss_iou": 0.5234375, + "loss_num": 0.02490234375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 360146952, + "step": 2851 + }, + { + "epoch": 0.731610337972167, + "grad_norm": 59.68442153930664, + "learning_rate": 5e-06, + "loss": 0.8492, + "num_input_tokens_seen": 360273492, + "step": 2852 + }, + { + "epoch": 0.731610337972167, + "loss": 0.7995551824569702, + "loss_ce": 0.000238730528508313, + "loss_iou": 0.373046875, + "loss_num": 0.0107421875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 360273492, + "step": 2852 + }, + { + "epoch": 0.7318668633361124, + "grad_norm": 52.57169723510742, + "learning_rate": 5e-06, + "loss": 1.1595, + "num_input_tokens_seen": 360399060, + "step": 2853 + }, + { + "epoch": 0.7318668633361124, + "loss": 1.048202633857727, + "loss_ce": 0.0013275989331305027, + "loss_iou": 0.474609375, + "loss_num": 0.0198974609375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 360399060, + "step": 2853 + }, + { + "epoch": 0.7321233887000577, + "grad_norm": 40.46049880981445, + "learning_rate": 5e-06, + "loss": 0.9902, + "num_input_tokens_seen": 360525232, + "step": 2854 + }, + { + "epoch": 0.7321233887000577, + "loss": 1.0628166198730469, + "loss_ce": 0.0017813870217651129, + "loss_iou": 0.46484375, + "loss_num": 0.0260009765625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 360525232, + "step": 2854 + }, + { + "epoch": 0.732379914064003, + "grad_norm": 51.18095779418945, + "learning_rate": 5e-06, + "loss": 0.9542, + "num_input_tokens_seen": 360651240, + "step": 2855 + }, + { + "epoch": 0.732379914064003, + "loss": 1.0427913665771484, + "loss_ce": 0.0012874825624749064, + "loss_iou": 0.4921875, + "loss_num": 0.01177978515625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 360651240, + "step": 2855 + }, + { + "epoch": 0.7326364394279484, + "grad_norm": 61.562355041503906, + "learning_rate": 5e-06, + "loss": 0.9819, + "num_input_tokens_seen": 360778452, + "step": 2856 + }, + { + "epoch": 0.7326364394279484, + "loss": 1.0774400234222412, + "loss_ce": 0.0002915410732384771, + "loss_iou": 0.5, + "loss_num": 0.01483154296875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 360778452, + "step": 2856 + }, + { + "epoch": 0.7328929647918938, + "grad_norm": 64.8097915649414, + "learning_rate": 5e-06, + "loss": 1.0325, + "num_input_tokens_seen": 360904584, + "step": 2857 + }, + { + "epoch": 0.7328929647918938, + "loss": 0.9444053173065186, + "loss_ce": 0.0005576678668148816, + "loss_iou": 0.435546875, + "loss_num": 0.0146484375, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 360904584, + "step": 2857 + }, + { + "epoch": 0.7331494901558392, + "grad_norm": 41.98838424682617, + "learning_rate": 5e-06, + "loss": 0.884, + "num_input_tokens_seen": 361030988, + "step": 2858 + }, + { + "epoch": 0.7331494901558392, + "loss": 0.879589319229126, + "loss_ce": 0.004101065918803215, + "loss_iou": 0.404296875, + "loss_num": 0.0133056640625, + "loss_xval": 0.875, + "num_input_tokens_seen": 361030988, + "step": 2858 + }, + { + "epoch": 0.7334060155197846, + "grad_norm": 44.346282958984375, + "learning_rate": 5e-06, + "loss": 0.9894, + "num_input_tokens_seen": 361157428, + "step": 2859 + }, + { + "epoch": 0.7334060155197846, + "loss": 1.1784920692443848, + "loss_ce": 0.001245994004420936, + "loss_iou": 0.53125, + "loss_num": 0.022705078125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 361157428, + "step": 2859 + }, + { + "epoch": 0.7336625408837298, + "grad_norm": 33.10796356201172, + "learning_rate": 5e-06, + "loss": 0.8992, + "num_input_tokens_seen": 361284312, + "step": 2860 + }, + { + "epoch": 0.7336625408837298, + "loss": 0.889793336391449, + "loss_ce": 0.0016097062034532428, + "loss_iou": 0.41796875, + "loss_num": 0.01031494140625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 361284312, + "step": 2860 + }, + { + "epoch": 0.7339190662476752, + "grad_norm": 32.144615173339844, + "learning_rate": 5e-06, + "loss": 0.9439, + "num_input_tokens_seen": 361410848, + "step": 2861 + }, + { + "epoch": 0.7339190662476752, + "loss": 0.958846926689148, + "loss_ce": 0.0008390661096200347, + "loss_iou": 0.431640625, + "loss_num": 0.019287109375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 361410848, + "step": 2861 + }, + { + "epoch": 0.7341755916116206, + "grad_norm": 33.133426666259766, + "learning_rate": 5e-06, + "loss": 0.8837, + "num_input_tokens_seen": 361536244, + "step": 2862 + }, + { + "epoch": 0.7341755916116206, + "loss": 0.8983770608901978, + "loss_ce": 0.00018374276987742633, + "loss_iou": 0.4140625, + "loss_num": 0.01397705078125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 361536244, + "step": 2862 + }, + { + "epoch": 0.734432116975566, + "grad_norm": 34.70969009399414, + "learning_rate": 5e-06, + "loss": 1.0178, + "num_input_tokens_seen": 361663228, + "step": 2863 + }, + { + "epoch": 0.734432116975566, + "loss": 1.1460214853286743, + "loss_ce": 0.0024668944533914328, + "loss_iou": 0.5078125, + "loss_num": 0.02587890625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 361663228, + "step": 2863 + }, + { + "epoch": 0.7346886423395114, + "grad_norm": 42.94603729248047, + "learning_rate": 5e-06, + "loss": 0.7638, + "num_input_tokens_seen": 361789212, + "step": 2864 + }, + { + "epoch": 0.7346886423395114, + "loss": 0.7293186783790588, + "loss_ce": 0.0005589468637481332, + "loss_iou": 0.345703125, + "loss_num": 0.00738525390625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 361789212, + "step": 2864 + }, + { + "epoch": 0.7349451677034566, + "grad_norm": 59.597625732421875, + "learning_rate": 5e-06, + "loss": 0.9369, + "num_input_tokens_seen": 361915520, + "step": 2865 + }, + { + "epoch": 0.7349451677034566, + "loss": 0.8105785846710205, + "loss_ce": 0.0005200278828851879, + "loss_iou": 0.373046875, + "loss_num": 0.012939453125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 361915520, + "step": 2865 + }, + { + "epoch": 0.735201693067402, + "grad_norm": 52.19535446166992, + "learning_rate": 5e-06, + "loss": 0.8186, + "num_input_tokens_seen": 362040524, + "step": 2866 + }, + { + "epoch": 0.735201693067402, + "loss": 0.9298555850982666, + "loss_ce": 0.00016807878273539245, + "loss_iou": 0.4296875, + "loss_num": 0.01409912109375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 362040524, + "step": 2866 + }, + { + "epoch": 0.7354582184313474, + "grad_norm": 52.613990783691406, + "learning_rate": 5e-06, + "loss": 0.9489, + "num_input_tokens_seen": 362167360, + "step": 2867 + }, + { + "epoch": 0.7354582184313474, + "loss": 0.8561729192733765, + "loss_ce": 0.00021591296535916626, + "loss_iou": 0.41015625, + "loss_num": 0.0069580078125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 362167360, + "step": 2867 + }, + { + "epoch": 0.7357147437952928, + "grad_norm": 73.20659637451172, + "learning_rate": 5e-06, + "loss": 0.8976, + "num_input_tokens_seen": 362294548, + "step": 2868 + }, + { + "epoch": 0.7357147437952928, + "loss": 0.7930554747581482, + "loss_ce": 0.0010633030906319618, + "loss_iou": 0.376953125, + "loss_num": 0.00726318359375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 362294548, + "step": 2868 + }, + { + "epoch": 0.7359712691592382, + "grad_norm": 46.24943542480469, + "learning_rate": 5e-06, + "loss": 1.0575, + "num_input_tokens_seen": 362419708, + "step": 2869 + }, + { + "epoch": 0.7359712691592382, + "loss": 1.130737066268921, + "loss_ce": 0.006225332152098417, + "loss_iou": 0.515625, + "loss_num": 0.0185546875, + "loss_xval": 1.125, + "num_input_tokens_seen": 362419708, + "step": 2869 + }, + { + "epoch": 0.7362277945231834, + "grad_norm": 34.62246322631836, + "learning_rate": 5e-06, + "loss": 1.0194, + "num_input_tokens_seen": 362546408, + "step": 2870 + }, + { + "epoch": 0.7362277945231834, + "loss": 0.9119246006011963, + "loss_ce": 0.0005476602236740291, + "loss_iou": 0.431640625, + "loss_num": 0.0093994140625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 362546408, + "step": 2870 + }, + { + "epoch": 0.7364843198871288, + "grad_norm": 49.51042556762695, + "learning_rate": 5e-06, + "loss": 0.7764, + "num_input_tokens_seen": 362673208, + "step": 2871 + }, + { + "epoch": 0.7364843198871288, + "loss": 0.6808434128761292, + "loss_ce": 0.00017936329822987318, + "loss_iou": 0.328125, + "loss_num": 0.005340576171875, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 362673208, + "step": 2871 + }, + { + "epoch": 0.7367408452510742, + "grad_norm": 61.80891799926758, + "learning_rate": 5e-06, + "loss": 0.8785, + "num_input_tokens_seen": 362800444, + "step": 2872 + }, + { + "epoch": 0.7367408452510742, + "loss": 0.9224401712417603, + "loss_ce": 0.002518290188163519, + "loss_iou": 0.44140625, + "loss_num": 0.0076904296875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 362800444, + "step": 2872 + }, + { + "epoch": 0.7369973706150196, + "grad_norm": 50.24666213989258, + "learning_rate": 5e-06, + "loss": 0.9393, + "num_input_tokens_seen": 362925696, + "step": 2873 + }, + { + "epoch": 0.7369973706150196, + "loss": 0.9510180354118347, + "loss_ce": 0.0008227573125623167, + "loss_iou": 0.451171875, + "loss_num": 0.00946044921875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 362925696, + "step": 2873 + }, + { + "epoch": 0.737253895978965, + "grad_norm": 42.02018737792969, + "learning_rate": 5e-06, + "loss": 0.9591, + "num_input_tokens_seen": 363052092, + "step": 2874 + }, + { + "epoch": 0.737253895978965, + "loss": 0.9521961212158203, + "loss_ce": 0.0022449966054409742, + "loss_iou": 0.439453125, + "loss_num": 0.01416015625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 363052092, + "step": 2874 + }, + { + "epoch": 0.7375104213429102, + "grad_norm": 54.57045364379883, + "learning_rate": 5e-06, + "loss": 0.9064, + "num_input_tokens_seen": 363178728, + "step": 2875 + }, + { + "epoch": 0.7375104213429102, + "loss": 0.8737182021141052, + "loss_ce": 0.0001830613473430276, + "loss_iou": 0.39453125, + "loss_num": 0.0169677734375, + "loss_xval": 0.875, + "num_input_tokens_seen": 363178728, + "step": 2875 + }, + { + "epoch": 0.7377669467068556, + "grad_norm": 42.90182113647461, + "learning_rate": 5e-06, + "loss": 0.9854, + "num_input_tokens_seen": 363304812, + "step": 2876 + }, + { + "epoch": 0.7377669467068556, + "loss": 0.9922471642494202, + "loss_ce": 0.002012768527492881, + "loss_iou": 0.455078125, + "loss_num": 0.0159912109375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 363304812, + "step": 2876 + }, + { + "epoch": 0.738023472070801, + "grad_norm": 34.664283752441406, + "learning_rate": 5e-06, + "loss": 1.0316, + "num_input_tokens_seen": 363431500, + "step": 2877 + }, + { + "epoch": 0.738023472070801, + "loss": 0.9916805028915405, + "loss_ce": 0.0019344130996614695, + "loss_iou": 0.46484375, + "loss_num": 0.01177978515625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 363431500, + "step": 2877 + }, + { + "epoch": 0.7382799974347464, + "grad_norm": 44.31930923461914, + "learning_rate": 5e-06, + "loss": 0.9473, + "num_input_tokens_seen": 363558568, + "step": 2878 + }, + { + "epoch": 0.7382799974347464, + "loss": 0.7525234222412109, + "loss_ce": 8.195130794774741e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0079345703125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 363558568, + "step": 2878 + }, + { + "epoch": 0.7385365227986918, + "grad_norm": 54.838623046875, + "learning_rate": 5e-06, + "loss": 0.9401, + "num_input_tokens_seen": 363685396, + "step": 2879 + }, + { + "epoch": 0.7385365227986918, + "loss": 0.7442762851715088, + "loss_ce": 0.0035536293871700764, + "loss_iou": 0.349609375, + "loss_num": 0.0081787109375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 363685396, + "step": 2879 + }, + { + "epoch": 0.7387930481626371, + "grad_norm": 57.230201721191406, + "learning_rate": 5e-06, + "loss": 1.0646, + "num_input_tokens_seen": 363812040, + "step": 2880 + }, + { + "epoch": 0.7387930481626371, + "loss": 1.0533959865570068, + "loss_ce": 0.00114986184053123, + "loss_iou": 0.4765625, + "loss_num": 0.02001953125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 363812040, + "step": 2880 + }, + { + "epoch": 0.7390495735265824, + "grad_norm": 57.717166900634766, + "learning_rate": 5e-06, + "loss": 0.9923, + "num_input_tokens_seen": 363937460, + "step": 2881 + }, + { + "epoch": 0.7390495735265824, + "loss": 1.1745389699935913, + "loss_ce": 0.003152207238599658, + "loss_iou": 0.50390625, + "loss_num": 0.03271484375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 363937460, + "step": 2881 + }, + { + "epoch": 0.7393060988905278, + "grad_norm": 49.700828552246094, + "learning_rate": 5e-06, + "loss": 0.9458, + "num_input_tokens_seen": 364064196, + "step": 2882 + }, + { + "epoch": 0.7393060988905278, + "loss": 0.9371277689933777, + "loss_ce": 0.0015808974858373404, + "loss_iou": 0.4375, + "loss_num": 0.0120849609375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 364064196, + "step": 2882 + }, + { + "epoch": 0.7395626242544732, + "grad_norm": 68.93170928955078, + "learning_rate": 5e-06, + "loss": 0.9155, + "num_input_tokens_seen": 364189904, + "step": 2883 + }, + { + "epoch": 0.7395626242544732, + "loss": 1.0154376029968262, + "loss_ce": 0.0017657028511166573, + "loss_iou": 0.48046875, + "loss_num": 0.01031494140625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 364189904, + "step": 2883 + }, + { + "epoch": 0.7398191496184185, + "grad_norm": 50.39097595214844, + "learning_rate": 5e-06, + "loss": 0.8981, + "num_input_tokens_seen": 364316084, + "step": 2884 + }, + { + "epoch": 0.7398191496184185, + "loss": 0.9500230550765991, + "loss_ce": 0.0003160022897645831, + "loss_iou": 0.44921875, + "loss_num": 0.0106201171875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 364316084, + "step": 2884 + }, + { + "epoch": 0.7400756749823639, + "grad_norm": 45.3362922668457, + "learning_rate": 5e-06, + "loss": 1.1094, + "num_input_tokens_seen": 364443448, + "step": 2885 + }, + { + "epoch": 0.7400756749823639, + "loss": 1.43181312084198, + "loss_ce": 0.0065201204270124435, + "loss_iou": 0.61328125, + "loss_num": 0.039794921875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 364443448, + "step": 2885 + }, + { + "epoch": 0.7403322003463092, + "grad_norm": 51.42271423339844, + "learning_rate": 5e-06, + "loss": 0.9866, + "num_input_tokens_seen": 364569788, + "step": 2886 + }, + { + "epoch": 0.7403322003463092, + "loss": 0.8768347501754761, + "loss_ce": 0.0015906791668385267, + "loss_iou": 0.416015625, + "loss_num": 0.00860595703125, + "loss_xval": 0.875, + "num_input_tokens_seen": 364569788, + "step": 2886 + }, + { + "epoch": 0.7405887257102546, + "grad_norm": 61.521060943603516, + "learning_rate": 5e-06, + "loss": 0.9792, + "num_input_tokens_seen": 364696276, + "step": 2887 + }, + { + "epoch": 0.7405887257102546, + "loss": 1.0240933895111084, + "loss_ce": 0.00016754731768742204, + "loss_iou": 0.482421875, + "loss_num": 0.01171875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 364696276, + "step": 2887 + }, + { + "epoch": 0.7408452510742, + "grad_norm": 65.92505645751953, + "learning_rate": 5e-06, + "loss": 0.8861, + "num_input_tokens_seen": 364823728, + "step": 2888 + }, + { + "epoch": 0.7408452510742, + "loss": 0.9291695356369019, + "loss_ce": 0.0009469046490266919, + "loss_iou": 0.443359375, + "loss_num": 0.00830078125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 364823728, + "step": 2888 + }, + { + "epoch": 0.7411017764381453, + "grad_norm": 59.999473571777344, + "learning_rate": 5e-06, + "loss": 1.0229, + "num_input_tokens_seen": 364950096, + "step": 2889 + }, + { + "epoch": 0.7411017764381453, + "loss": 1.1038157939910889, + "loss_ce": 0.0012767431326210499, + "loss_iou": 0.50390625, + "loss_num": 0.019287109375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 364950096, + "step": 2889 + }, + { + "epoch": 0.7413583018020907, + "grad_norm": 65.09830474853516, + "learning_rate": 5e-06, + "loss": 1.0048, + "num_input_tokens_seen": 365076568, + "step": 2890 + }, + { + "epoch": 0.7413583018020907, + "loss": 1.1403884887695312, + "loss_ce": 0.001716645434498787, + "loss_iou": 0.5078125, + "loss_num": 0.0242919921875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 365076568, + "step": 2890 + }, + { + "epoch": 0.741614827166036, + "grad_norm": 40.4169807434082, + "learning_rate": 5e-06, + "loss": 0.8165, + "num_input_tokens_seen": 365202080, + "step": 2891 + }, + { + "epoch": 0.741614827166036, + "loss": 0.7703062295913696, + "loss_ce": 0.001263250014744699, + "loss_iou": 0.37109375, + "loss_num": 0.005096435546875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 365202080, + "step": 2891 + }, + { + "epoch": 0.7418713525299814, + "grad_norm": 53.15790939331055, + "learning_rate": 5e-06, + "loss": 0.9223, + "num_input_tokens_seen": 365327992, + "step": 2892 + }, + { + "epoch": 0.7418713525299814, + "loss": 0.9246824979782104, + "loss_ce": 0.0003660918155219406, + "loss_iou": 0.419921875, + "loss_num": 0.017333984375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 365327992, + "step": 2892 + }, + { + "epoch": 0.7421278778939268, + "grad_norm": 56.599388122558594, + "learning_rate": 5e-06, + "loss": 0.9457, + "num_input_tokens_seen": 365454616, + "step": 2893 + }, + { + "epoch": 0.7421278778939268, + "loss": 1.0354650020599365, + "loss_ce": 0.0017734863795340061, + "loss_iou": 0.470703125, + "loss_num": 0.0186767578125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 365454616, + "step": 2893 + }, + { + "epoch": 0.7423844032578721, + "grad_norm": 55.33816909790039, + "learning_rate": 5e-06, + "loss": 0.9248, + "num_input_tokens_seen": 365581820, + "step": 2894 + }, + { + "epoch": 0.7423844032578721, + "loss": 0.9322671890258789, + "loss_ce": 0.0008707055822014809, + "loss_iou": 0.4453125, + "loss_num": 0.00762939453125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 365581820, + "step": 2894 + }, + { + "epoch": 0.7426409286218175, + "grad_norm": 49.030006408691406, + "learning_rate": 5e-06, + "loss": 0.9617, + "num_input_tokens_seen": 365707744, + "step": 2895 + }, + { + "epoch": 0.7426409286218175, + "loss": 0.77397620677948, + "loss_ce": 0.0007828634697943926, + "loss_iou": 0.37109375, + "loss_num": 0.005828857421875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 365707744, + "step": 2895 + }, + { + "epoch": 0.7428974539857628, + "grad_norm": 42.99722671508789, + "learning_rate": 5e-06, + "loss": 0.9314, + "num_input_tokens_seen": 365834148, + "step": 2896 + }, + { + "epoch": 0.7428974539857628, + "loss": 0.842495322227478, + "loss_ce": 0.0016750018112361431, + "loss_iou": 0.388671875, + "loss_num": 0.01251220703125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 365834148, + "step": 2896 + }, + { + "epoch": 0.7431539793497082, + "grad_norm": 48.429298400878906, + "learning_rate": 5e-06, + "loss": 0.8579, + "num_input_tokens_seen": 365960436, + "step": 2897 + }, + { + "epoch": 0.7431539793497082, + "loss": 0.8478619456291199, + "loss_ce": 0.0006939702434465289, + "loss_iou": 0.396484375, + "loss_num": 0.0107421875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 365960436, + "step": 2897 + }, + { + "epoch": 0.7434105047136536, + "grad_norm": 42.93800354003906, + "learning_rate": 5e-06, + "loss": 0.8292, + "num_input_tokens_seen": 366085796, + "step": 2898 + }, + { + "epoch": 0.7434105047136536, + "loss": 0.7011233568191528, + "loss_ce": 0.00043974071741104126, + "loss_iou": 0.337890625, + "loss_num": 0.0050048828125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 366085796, + "step": 2898 + }, + { + "epoch": 0.7436670300775989, + "grad_norm": 56.67951583862305, + "learning_rate": 5e-06, + "loss": 1.0165, + "num_input_tokens_seen": 366211696, + "step": 2899 + }, + { + "epoch": 0.7436670300775989, + "loss": 1.1352980136871338, + "loss_ce": 0.0002882396802306175, + "loss_iou": 0.51171875, + "loss_num": 0.0223388671875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 366211696, + "step": 2899 + }, + { + "epoch": 0.7439235554415443, + "grad_norm": 38.33698272705078, + "learning_rate": 5e-06, + "loss": 0.9848, + "num_input_tokens_seen": 366336772, + "step": 2900 + }, + { + "epoch": 0.7439235554415443, + "loss": 0.9187126159667969, + "loss_ce": 0.003673582337796688, + "loss_iou": 0.419921875, + "loss_num": 0.0152587890625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 366336772, + "step": 2900 + }, + { + "epoch": 0.7441800808054897, + "grad_norm": 24.449342727661133, + "learning_rate": 5e-06, + "loss": 0.9038, + "num_input_tokens_seen": 366462652, + "step": 2901 + }, + { + "epoch": 0.7441800808054897, + "loss": 0.7887614965438843, + "loss_ce": 0.00018728242139331996, + "loss_iou": 0.376953125, + "loss_num": 0.007171630859375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 366462652, + "step": 2901 + }, + { + "epoch": 0.744436606169435, + "grad_norm": 31.02239418029785, + "learning_rate": 5e-06, + "loss": 0.9236, + "num_input_tokens_seen": 366589144, + "step": 2902 + }, + { + "epoch": 0.744436606169435, + "loss": 1.0597388744354248, + "loss_ce": 0.0006568798562511802, + "loss_iou": 0.48046875, + "loss_num": 0.019775390625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 366589144, + "step": 2902 + }, + { + "epoch": 0.7446931315333803, + "grad_norm": 63.14118194580078, + "learning_rate": 5e-06, + "loss": 0.8998, + "num_input_tokens_seen": 366715780, + "step": 2903 + }, + { + "epoch": 0.7446931315333803, + "loss": 1.0560648441314697, + "loss_ce": 0.00015660231292713434, + "loss_iou": 0.47265625, + "loss_num": 0.0218505859375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 366715780, + "step": 2903 + }, + { + "epoch": 0.7449496568973257, + "grad_norm": 29.228683471679688, + "learning_rate": 5e-06, + "loss": 0.8631, + "num_input_tokens_seen": 366842312, + "step": 2904 + }, + { + "epoch": 0.7449496568973257, + "loss": 0.7216359972953796, + "loss_ce": 0.0009328331798315048, + "loss_iou": 0.3359375, + "loss_num": 0.0098876953125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 366842312, + "step": 2904 + }, + { + "epoch": 0.7452061822612711, + "grad_norm": 41.407981872558594, + "learning_rate": 5e-06, + "loss": 0.9613, + "num_input_tokens_seen": 366967960, + "step": 2905 + }, + { + "epoch": 0.7452061822612711, + "loss": 0.8849389553070068, + "loss_ce": 0.00017335366283077747, + "loss_iou": 0.419921875, + "loss_num": 0.00909423828125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 366967960, + "step": 2905 + }, + { + "epoch": 0.7454627076252165, + "grad_norm": 44.30210494995117, + "learning_rate": 5e-06, + "loss": 0.9986, + "num_input_tokens_seen": 367093872, + "step": 2906 + }, + { + "epoch": 0.7454627076252165, + "loss": 1.0389024019241333, + "loss_ce": 0.0013047081883996725, + "loss_iou": 0.48828125, + "loss_num": 0.01239013671875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 367093872, + "step": 2906 + }, + { + "epoch": 0.7457192329891618, + "grad_norm": 52.07560348510742, + "learning_rate": 5e-06, + "loss": 0.8747, + "num_input_tokens_seen": 367221216, + "step": 2907 + }, + { + "epoch": 0.7457192329891618, + "loss": 0.7859081029891968, + "loss_ce": 0.0002636136778164655, + "loss_iou": 0.375, + "loss_num": 0.007476806640625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 367221216, + "step": 2907 + }, + { + "epoch": 0.7459757583531071, + "grad_norm": 48.93008804321289, + "learning_rate": 5e-06, + "loss": 0.8446, + "num_input_tokens_seen": 367347660, + "step": 2908 + }, + { + "epoch": 0.7459757583531071, + "loss": 0.8892890214920044, + "loss_ce": 0.0006171064451336861, + "loss_iou": 0.404296875, + "loss_num": 0.01611328125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 367347660, + "step": 2908 + }, + { + "epoch": 0.7462322837170525, + "grad_norm": 37.19274139404297, + "learning_rate": 5e-06, + "loss": 1.0103, + "num_input_tokens_seen": 367473268, + "step": 2909 + }, + { + "epoch": 0.7462322837170525, + "loss": 1.0072401762008667, + "loss_ce": 0.0004042255459353328, + "loss_iou": 0.48046875, + "loss_num": 0.00933837890625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 367473268, + "step": 2909 + }, + { + "epoch": 0.7464888090809979, + "grad_norm": 46.148353576660156, + "learning_rate": 5e-06, + "loss": 0.9917, + "num_input_tokens_seen": 367599068, + "step": 2910 + }, + { + "epoch": 0.7464888090809979, + "loss": 0.8082821369171143, + "loss_ce": 0.00017662528262007982, + "loss_iou": 0.376953125, + "loss_num": 0.01092529296875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 367599068, + "step": 2910 + }, + { + "epoch": 0.7467453344449433, + "grad_norm": 38.28260803222656, + "learning_rate": 5e-06, + "loss": 0.9583, + "num_input_tokens_seen": 367725516, + "step": 2911 + }, + { + "epoch": 0.7467453344449433, + "loss": 1.2908265590667725, + "loss_ce": 0.0007875305018387735, + "loss_iou": 0.55859375, + "loss_num": 0.03466796875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 367725516, + "step": 2911 + }, + { + "epoch": 0.7470018598088886, + "grad_norm": 51.0726432800293, + "learning_rate": 5e-06, + "loss": 0.9228, + "num_input_tokens_seen": 367852492, + "step": 2912 + }, + { + "epoch": 0.7470018598088886, + "loss": 1.1356914043426514, + "loss_ce": 0.005320283118635416, + "loss_iou": 0.51953125, + "loss_num": 0.01904296875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 367852492, + "step": 2912 + }, + { + "epoch": 0.7472583851728339, + "grad_norm": 45.088077545166016, + "learning_rate": 5e-06, + "loss": 0.9329, + "num_input_tokens_seen": 367978688, + "step": 2913 + }, + { + "epoch": 0.7472583851728339, + "loss": 0.8362195491790771, + "loss_ce": 0.0002820372174028307, + "loss_iou": 0.39453125, + "loss_num": 0.0093994140625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 367978688, + "step": 2913 + }, + { + "epoch": 0.7475149105367793, + "grad_norm": 40.52402114868164, + "learning_rate": 5e-06, + "loss": 0.9694, + "num_input_tokens_seen": 368105260, + "step": 2914 + }, + { + "epoch": 0.7475149105367793, + "loss": 0.8095253705978394, + "loss_ce": 0.004837851971387863, + "loss_iou": 0.376953125, + "loss_num": 0.01019287109375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 368105260, + "step": 2914 + }, + { + "epoch": 0.7477714359007247, + "grad_norm": 46.94044494628906, + "learning_rate": 5e-06, + "loss": 1.0062, + "num_input_tokens_seen": 368231940, + "step": 2915 + }, + { + "epoch": 0.7477714359007247, + "loss": 0.9161401987075806, + "loss_ce": 0.002565952017903328, + "loss_iou": 0.4296875, + "loss_num": 0.01080322265625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 368231940, + "step": 2915 + }, + { + "epoch": 0.7480279612646701, + "grad_norm": 53.26560974121094, + "learning_rate": 5e-06, + "loss": 0.9311, + "num_input_tokens_seen": 368356912, + "step": 2916 + }, + { + "epoch": 0.7480279612646701, + "loss": 0.8973902463912964, + "loss_ce": 0.0016382763860747218, + "loss_iou": 0.4140625, + "loss_num": 0.01397705078125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 368356912, + "step": 2916 + }, + { + "epoch": 0.7482844866286154, + "grad_norm": 52.47718811035156, + "learning_rate": 5e-06, + "loss": 1.0244, + "num_input_tokens_seen": 368483984, + "step": 2917 + }, + { + "epoch": 0.7482844866286154, + "loss": 1.0073564052581787, + "loss_ce": 0.002473614178597927, + "loss_iou": 0.45703125, + "loss_num": 0.017822265625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 368483984, + "step": 2917 + }, + { + "epoch": 0.7485410119925607, + "grad_norm": 66.00101470947266, + "learning_rate": 5e-06, + "loss": 0.9624, + "num_input_tokens_seen": 368610968, + "step": 2918 + }, + { + "epoch": 0.7485410119925607, + "loss": 1.1099236011505127, + "loss_ce": 0.001525128143839538, + "loss_iou": 0.4921875, + "loss_num": 0.024658203125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 368610968, + "step": 2918 + }, + { + "epoch": 0.7487975373565061, + "grad_norm": 50.08500671386719, + "learning_rate": 5e-06, + "loss": 1.0591, + "num_input_tokens_seen": 368736752, + "step": 2919 + }, + { + "epoch": 0.7487975373565061, + "loss": 1.0772939920425415, + "loss_ce": 0.00038969298475421965, + "loss_iou": 0.5, + "loss_num": 0.0159912109375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 368736752, + "step": 2919 + }, + { + "epoch": 0.7490540627204515, + "grad_norm": 127.04724884033203, + "learning_rate": 5e-06, + "loss": 0.9248, + "num_input_tokens_seen": 368862936, + "step": 2920 + }, + { + "epoch": 0.7490540627204515, + "loss": 0.8267951011657715, + "loss_ce": 0.001111525227315724, + "loss_iou": 0.38671875, + "loss_num": 0.01031494140625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 368862936, + "step": 2920 + }, + { + "epoch": 0.7493105880843969, + "grad_norm": 38.62042236328125, + "learning_rate": 5e-06, + "loss": 1.0347, + "num_input_tokens_seen": 368989152, + "step": 2921 + }, + { + "epoch": 0.7493105880843969, + "loss": 1.0581347942352295, + "loss_ce": 0.001982544083148241, + "loss_iou": 0.490234375, + "loss_num": 0.01531982421875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 368989152, + "step": 2921 + }, + { + "epoch": 0.7495671134483423, + "grad_norm": 52.66433334350586, + "learning_rate": 5e-06, + "loss": 0.8221, + "num_input_tokens_seen": 369115820, + "step": 2922 + }, + { + "epoch": 0.7495671134483423, + "loss": 0.8487037420272827, + "loss_ce": 0.0010474587325006723, + "loss_iou": 0.390625, + "loss_num": 0.01373291015625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 369115820, + "step": 2922 + }, + { + "epoch": 0.7498236388122875, + "grad_norm": 58.686187744140625, + "learning_rate": 5e-06, + "loss": 0.9336, + "num_input_tokens_seen": 369242432, + "step": 2923 + }, + { + "epoch": 0.7498236388122875, + "loss": 0.8837091326713562, + "loss_ce": 0.0038263278547674417, + "loss_iou": 0.4140625, + "loss_num": 0.01068115234375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 369242432, + "step": 2923 + }, + { + "epoch": 0.7500801641762329, + "grad_norm": 75.75703430175781, + "learning_rate": 5e-06, + "loss": 0.9585, + "num_input_tokens_seen": 369368752, + "step": 2924 + }, + { + "epoch": 0.7500801641762329, + "loss": 0.9671162366867065, + "loss_ce": 0.0003194212622474879, + "loss_iou": 0.46484375, + "loss_num": 0.00775146484375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 369368752, + "step": 2924 + }, + { + "epoch": 0.7503366895401783, + "grad_norm": 57.64829635620117, + "learning_rate": 5e-06, + "loss": 1.0495, + "num_input_tokens_seen": 369496068, + "step": 2925 + }, + { + "epoch": 0.7503366895401783, + "loss": 0.916729748249054, + "loss_ce": 0.00022585361148230731, + "loss_iou": 0.4375, + "loss_num": 0.00836181640625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 369496068, + "step": 2925 + }, + { + "epoch": 0.7505932149041237, + "grad_norm": 23.91770362854004, + "learning_rate": 5e-06, + "loss": 0.9271, + "num_input_tokens_seen": 369622380, + "step": 2926 + }, + { + "epoch": 0.7505932149041237, + "loss": 0.8240903615951538, + "loss_ce": 0.0013364898040890694, + "loss_iou": 0.38671875, + "loss_num": 0.00958251953125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 369622380, + "step": 2926 + }, + { + "epoch": 0.750849740268069, + "grad_norm": 24.5523624420166, + "learning_rate": 5e-06, + "loss": 0.9012, + "num_input_tokens_seen": 369748976, + "step": 2927 + }, + { + "epoch": 0.750849740268069, + "loss": 0.8869302272796631, + "loss_ce": 0.001676369458436966, + "loss_iou": 0.40625, + "loss_num": 0.01422119140625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 369748976, + "step": 2927 + }, + { + "epoch": 0.7511062656320143, + "grad_norm": 59.7867317199707, + "learning_rate": 5e-06, + "loss": 0.8306, + "num_input_tokens_seen": 369874952, + "step": 2928 + }, + { + "epoch": 0.7511062656320143, + "loss": 0.9152774810791016, + "loss_ce": 0.001703269430436194, + "loss_iou": 0.431640625, + "loss_num": 0.010009765625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 369874952, + "step": 2928 + }, + { + "epoch": 0.7513627909959597, + "grad_norm": 32.444087982177734, + "learning_rate": 5e-06, + "loss": 0.9675, + "num_input_tokens_seen": 370000728, + "step": 2929 + }, + { + "epoch": 0.7513627909959597, + "loss": 0.759773850440979, + "loss_ce": 0.001473087351769209, + "loss_iou": 0.353515625, + "loss_num": 0.01025390625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 370000728, + "step": 2929 + }, + { + "epoch": 0.7516193163599051, + "grad_norm": 37.633174896240234, + "learning_rate": 5e-06, + "loss": 0.9555, + "num_input_tokens_seen": 370126488, + "step": 2930 + }, + { + "epoch": 0.7516193163599051, + "loss": 0.8306431770324707, + "loss_ce": 0.001541624660603702, + "loss_iou": 0.392578125, + "loss_num": 0.0089111328125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 370126488, + "step": 2930 + }, + { + "epoch": 0.7518758417238505, + "grad_norm": 34.20035171508789, + "learning_rate": 5e-06, + "loss": 1.0137, + "num_input_tokens_seen": 370250408, + "step": 2931 + }, + { + "epoch": 0.7518758417238505, + "loss": 1.0433313846588135, + "loss_ce": 0.002315775491297245, + "loss_iou": 0.4609375, + "loss_num": 0.0238037109375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 370250408, + "step": 2931 + }, + { + "epoch": 0.7521323670877959, + "grad_norm": 31.269594192504883, + "learning_rate": 5e-06, + "loss": 0.9913, + "num_input_tokens_seen": 370377932, + "step": 2932 + }, + { + "epoch": 0.7521323670877959, + "loss": 1.1491944789886475, + "loss_ce": 0.0012452425435185432, + "loss_iou": 0.515625, + "loss_num": 0.024169921875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 370377932, + "step": 2932 + }, + { + "epoch": 0.7523888924517411, + "grad_norm": 129.4665985107422, + "learning_rate": 5e-06, + "loss": 0.961, + "num_input_tokens_seen": 370504056, + "step": 2933 + }, + { + "epoch": 0.7523888924517411, + "loss": 1.029911994934082, + "loss_ce": 0.0015916588017717004, + "loss_iou": 0.455078125, + "loss_num": 0.0235595703125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 370504056, + "step": 2933 + }, + { + "epoch": 0.7526454178156865, + "grad_norm": 67.43452453613281, + "learning_rate": 5e-06, + "loss": 0.9946, + "num_input_tokens_seen": 370630080, + "step": 2934 + }, + { + "epoch": 0.7526454178156865, + "loss": 0.9475643038749695, + "loss_ce": 0.0002986827748827636, + "loss_iou": 0.43359375, + "loss_num": 0.0157470703125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 370630080, + "step": 2934 + }, + { + "epoch": 0.7529019431796319, + "grad_norm": 72.05486297607422, + "learning_rate": 5e-06, + "loss": 1.0709, + "num_input_tokens_seen": 370756984, + "step": 2935 + }, + { + "epoch": 0.7529019431796319, + "loss": 1.1980626583099365, + "loss_ce": 0.0007969894795678556, + "loss_iou": 0.54296875, + "loss_num": 0.0218505859375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 370756984, + "step": 2935 + }, + { + "epoch": 0.7531584685435773, + "grad_norm": 30.09787940979004, + "learning_rate": 5e-06, + "loss": 1.055, + "num_input_tokens_seen": 370883880, + "step": 2936 + }, + { + "epoch": 0.7531584685435773, + "loss": 1.3535676002502441, + "loss_ce": 0.0024934483226388693, + "loss_iou": 0.59765625, + "loss_num": 0.031005859375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 370883880, + "step": 2936 + }, + { + "epoch": 0.7534149939075226, + "grad_norm": 26.132169723510742, + "learning_rate": 5e-06, + "loss": 0.8269, + "num_input_tokens_seen": 371009960, + "step": 2937 + }, + { + "epoch": 0.7534149939075226, + "loss": 0.7266606092453003, + "loss_ce": 9.808260801946744e-05, + "loss_iou": 0.341796875, + "loss_num": 0.0089111328125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 371009960, + "step": 2937 + }, + { + "epoch": 0.7536715192714679, + "grad_norm": 40.92720413208008, + "learning_rate": 5e-06, + "loss": 0.7756, + "num_input_tokens_seen": 371136112, + "step": 2938 + }, + { + "epoch": 0.7536715192714679, + "loss": 0.8086890578269958, + "loss_ce": 0.00033946745679713786, + "loss_iou": 0.37890625, + "loss_num": 0.0103759765625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 371136112, + "step": 2938 + }, + { + "epoch": 0.7539280446354133, + "grad_norm": 50.131526947021484, + "learning_rate": 5e-06, + "loss": 0.9938, + "num_input_tokens_seen": 371262380, + "step": 2939 + }, + { + "epoch": 0.7539280446354133, + "loss": 0.7855103015899658, + "loss_ce": 0.00035402868525125086, + "loss_iou": 0.376953125, + "loss_num": 0.006103515625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 371262380, + "step": 2939 + }, + { + "epoch": 0.7541845699993587, + "grad_norm": 55.68073272705078, + "learning_rate": 5e-06, + "loss": 0.8413, + "num_input_tokens_seen": 371389068, + "step": 2940 + }, + { + "epoch": 0.7541845699993587, + "loss": 0.8037253022193909, + "loss_ce": 0.0005026362487114966, + "loss_iou": 0.376953125, + "loss_num": 0.01019287109375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 371389068, + "step": 2940 + }, + { + "epoch": 0.7544410953633041, + "grad_norm": 50.3552360534668, + "learning_rate": 5e-06, + "loss": 0.9839, + "num_input_tokens_seen": 371516228, + "step": 2941 + }, + { + "epoch": 0.7544410953633041, + "loss": 0.839636504650116, + "loss_ce": 0.0002810178557410836, + "loss_iou": 0.3984375, + "loss_num": 0.00830078125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 371516228, + "step": 2941 + }, + { + "epoch": 0.7546976207272494, + "grad_norm": 55.25839614868164, + "learning_rate": 5e-06, + "loss": 0.9236, + "num_input_tokens_seen": 371643648, + "step": 2942 + }, + { + "epoch": 0.7546976207272494, + "loss": 1.0327321290969849, + "loss_ce": 0.001482107792980969, + "loss_iou": 0.482421875, + "loss_num": 0.01312255859375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 371643648, + "step": 2942 + }, + { + "epoch": 0.7549541460911948, + "grad_norm": 51.121116638183594, + "learning_rate": 5e-06, + "loss": 1.1064, + "num_input_tokens_seen": 371770244, + "step": 2943 + }, + { + "epoch": 0.7549541460911948, + "loss": 1.0671833753585815, + "loss_ce": 0.0002888813032768667, + "loss_iou": 0.494140625, + "loss_num": 0.0159912109375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 371770244, + "step": 2943 + }, + { + "epoch": 0.7552106714551401, + "grad_norm": 33.96750259399414, + "learning_rate": 5e-06, + "loss": 0.9157, + "num_input_tokens_seen": 371896280, + "step": 2944 + }, + { + "epoch": 0.7552106714551401, + "loss": 0.9088529348373413, + "loss_ce": 0.0006498864386230707, + "loss_iou": 0.4140625, + "loss_num": 0.015625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 371896280, + "step": 2944 + }, + { + "epoch": 0.7554671968190855, + "grad_norm": 42.79534149169922, + "learning_rate": 5e-06, + "loss": 0.9689, + "num_input_tokens_seen": 372021608, + "step": 2945 + }, + { + "epoch": 0.7554671968190855, + "loss": 0.9629539847373962, + "loss_ce": 0.0015281589003279805, + "loss_iou": 0.4453125, + "loss_num": 0.01416015625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 372021608, + "step": 2945 + }, + { + "epoch": 0.7557237221830309, + "grad_norm": 48.907371520996094, + "learning_rate": 5e-06, + "loss": 0.847, + "num_input_tokens_seen": 372148148, + "step": 2946 + }, + { + "epoch": 0.7557237221830309, + "loss": 0.8158549070358276, + "loss_ce": 0.0026225056499242783, + "loss_iou": 0.3671875, + "loss_num": 0.01544189453125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 372148148, + "step": 2946 + }, + { + "epoch": 0.7559802475469762, + "grad_norm": 47.80815505981445, + "learning_rate": 5e-06, + "loss": 1.1017, + "num_input_tokens_seen": 372274872, + "step": 2947 + }, + { + "epoch": 0.7559802475469762, + "loss": 1.1542061567306519, + "loss_ce": 0.00039753917371854186, + "loss_iou": 0.52734375, + "loss_num": 0.0194091796875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 372274872, + "step": 2947 + }, + { + "epoch": 0.7562367729109216, + "grad_norm": 50.44989776611328, + "learning_rate": 5e-06, + "loss": 0.8847, + "num_input_tokens_seen": 372400848, + "step": 2948 + }, + { + "epoch": 0.7562367729109216, + "loss": 0.8683205842971802, + "loss_ce": 0.004551074467599392, + "loss_iou": 0.40625, + "loss_num": 0.0107421875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 372400848, + "step": 2948 + }, + { + "epoch": 0.7564932982748669, + "grad_norm": 52.11412048339844, + "learning_rate": 5e-06, + "loss": 0.9332, + "num_input_tokens_seen": 372527024, + "step": 2949 + }, + { + "epoch": 0.7564932982748669, + "loss": 0.9911819696426392, + "loss_ce": 0.001435853191651404, + "loss_iou": 0.4609375, + "loss_num": 0.013427734375, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 372527024, + "step": 2949 + }, + { + "epoch": 0.7567498236388123, + "grad_norm": 45.02421951293945, + "learning_rate": 5e-06, + "loss": 0.9029, + "num_input_tokens_seen": 372651624, + "step": 2950 + }, + { + "epoch": 0.7567498236388123, + "loss": 0.7059605121612549, + "loss_ce": 0.00014994715456850827, + "loss_iou": 0.337890625, + "loss_num": 0.00628662109375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 372651624, + "step": 2950 + }, + { + "epoch": 0.7570063490027577, + "grad_norm": 45.83402633666992, + "learning_rate": 5e-06, + "loss": 0.936, + "num_input_tokens_seen": 372777512, + "step": 2951 + }, + { + "epoch": 0.7570063490027577, + "loss": 1.0333895683288574, + "loss_ce": 0.0006747430888935924, + "loss_iou": 0.46484375, + "loss_num": 0.02099609375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 372777512, + "step": 2951 + }, + { + "epoch": 0.757262874366703, + "grad_norm": 54.62164306640625, + "learning_rate": 5e-06, + "loss": 0.9646, + "num_input_tokens_seen": 372903696, + "step": 2952 + }, + { + "epoch": 0.757262874366703, + "loss": 1.2780678272247314, + "loss_ce": 0.0014564378652721643, + "loss_iou": 0.58203125, + "loss_num": 0.0218505859375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 372903696, + "step": 2952 + }, + { + "epoch": 0.7575193997306484, + "grad_norm": 44.0029182434082, + "learning_rate": 5e-06, + "loss": 0.9768, + "num_input_tokens_seen": 373029860, + "step": 2953 + }, + { + "epoch": 0.7575193997306484, + "loss": 0.8335681557655334, + "loss_ce": 0.0042224302887916565, + "loss_iou": 0.39453125, + "loss_num": 0.0081787109375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 373029860, + "step": 2953 + }, + { + "epoch": 0.7577759250945937, + "grad_norm": 35.36329650878906, + "learning_rate": 5e-06, + "loss": 0.9466, + "num_input_tokens_seen": 373156908, + "step": 2954 + }, + { + "epoch": 0.7577759250945937, + "loss": 0.9010488390922546, + "loss_ce": 0.00016992632299661636, + "loss_iou": 0.427734375, + "loss_num": 0.00933837890625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 373156908, + "step": 2954 + }, + { + "epoch": 0.7580324504585391, + "grad_norm": 65.16951751708984, + "learning_rate": 5e-06, + "loss": 0.9323, + "num_input_tokens_seen": 373282184, + "step": 2955 + }, + { + "epoch": 0.7580324504585391, + "loss": 0.7967361211776733, + "loss_ce": 0.0008376696496270597, + "loss_iou": 0.380859375, + "loss_num": 0.006927490234375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 373282184, + "step": 2955 + }, + { + "epoch": 0.7582889758224844, + "grad_norm": 30.93491554260254, + "learning_rate": 5e-06, + "loss": 1.0973, + "num_input_tokens_seen": 373409256, + "step": 2956 + }, + { + "epoch": 0.7582889758224844, + "loss": 1.3271863460540771, + "loss_ce": 0.0015027127228677273, + "loss_iou": 0.57421875, + "loss_num": 0.034912109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 373409256, + "step": 2956 + }, + { + "epoch": 0.7585455011864298, + "grad_norm": 32.10368728637695, + "learning_rate": 5e-06, + "loss": 0.8917, + "num_input_tokens_seen": 373534256, + "step": 2957 + }, + { + "epoch": 0.7585455011864298, + "loss": 1.1536974906921387, + "loss_ce": 0.001353770261630416, + "loss_iou": 0.515625, + "loss_num": 0.0242919921875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 373534256, + "step": 2957 + }, + { + "epoch": 0.7588020265503752, + "grad_norm": 28.152685165405273, + "learning_rate": 5e-06, + "loss": 0.8366, + "num_input_tokens_seen": 373660736, + "step": 2958 + }, + { + "epoch": 0.7588020265503752, + "loss": 0.7628601789474487, + "loss_ce": 0.0001648999168537557, + "loss_iou": 0.35546875, + "loss_num": 0.01031494140625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 373660736, + "step": 2958 + }, + { + "epoch": 0.7590585519143205, + "grad_norm": 36.67182159423828, + "learning_rate": 5e-06, + "loss": 1.0225, + "num_input_tokens_seen": 373786812, + "step": 2959 + }, + { + "epoch": 0.7590585519143205, + "loss": 0.8675322532653809, + "loss_ce": 0.0010772129753604531, + "loss_iou": 0.384765625, + "loss_num": 0.019287109375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 373786812, + "step": 2959 + }, + { + "epoch": 0.7593150772782659, + "grad_norm": 58.903228759765625, + "learning_rate": 5e-06, + "loss": 0.8594, + "num_input_tokens_seen": 373913828, + "step": 2960 + }, + { + "epoch": 0.7593150772782659, + "loss": 0.8267399072647095, + "loss_ce": 0.0030094454996287823, + "loss_iou": 0.392578125, + "loss_num": 0.0079345703125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 373913828, + "step": 2960 + }, + { + "epoch": 0.7595716026422112, + "grad_norm": 56.84625244140625, + "learning_rate": 5e-06, + "loss": 0.9519, + "num_input_tokens_seen": 374039736, + "step": 2961 + }, + { + "epoch": 0.7595716026422112, + "loss": 0.7220573425292969, + "loss_ce": 0.0013541971566155553, + "loss_iou": 0.34375, + "loss_num": 0.006195068359375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 374039736, + "step": 2961 + }, + { + "epoch": 0.7598281280061566, + "grad_norm": 44.762752532958984, + "learning_rate": 5e-06, + "loss": 0.8812, + "num_input_tokens_seen": 374165556, + "step": 2962 + }, + { + "epoch": 0.7598281280061566, + "loss": 1.0148217678070068, + "loss_ce": 0.0006616117316298187, + "loss_iou": 0.453125, + "loss_num": 0.0213623046875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 374165556, + "step": 2962 + }, + { + "epoch": 0.760084653370102, + "grad_norm": 46.556514739990234, + "learning_rate": 5e-06, + "loss": 0.948, + "num_input_tokens_seen": 374292376, + "step": 2963 + }, + { + "epoch": 0.760084653370102, + "loss": 1.0515354871749878, + "loss_ce": 0.0022190194576978683, + "loss_iou": 0.4609375, + "loss_num": 0.0252685546875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 374292376, + "step": 2963 + }, + { + "epoch": 0.7603411787340473, + "grad_norm": 36.3901481628418, + "learning_rate": 5e-06, + "loss": 0.9882, + "num_input_tokens_seen": 374419420, + "step": 2964 + }, + { + "epoch": 0.7603411787340473, + "loss": 0.9720080494880676, + "loss_ce": 0.00032833623117767274, + "loss_iou": 0.458984375, + "loss_num": 0.01068115234375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 374419420, + "step": 2964 + }, + { + "epoch": 0.7605977040979927, + "grad_norm": 42.79838562011719, + "learning_rate": 5e-06, + "loss": 0.9306, + "num_input_tokens_seen": 374545548, + "step": 2965 + }, + { + "epoch": 0.7605977040979927, + "loss": 0.9185129404067993, + "loss_ce": 0.0034738441463559866, + "loss_iou": 0.423828125, + "loss_num": 0.013671875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 374545548, + "step": 2965 + }, + { + "epoch": 0.760854229461938, + "grad_norm": 52.98666763305664, + "learning_rate": 5e-06, + "loss": 0.9971, + "num_input_tokens_seen": 374671960, + "step": 2966 + }, + { + "epoch": 0.760854229461938, + "loss": 1.0364844799041748, + "loss_ce": 0.001816516974940896, + "loss_iou": 0.462890625, + "loss_num": 0.0218505859375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 374671960, + "step": 2966 + }, + { + "epoch": 0.7611107548258834, + "grad_norm": 42.50741958618164, + "learning_rate": 5e-06, + "loss": 0.8961, + "num_input_tokens_seen": 374797460, + "step": 2967 + }, + { + "epoch": 0.7611107548258834, + "loss": 1.0140774250030518, + "loss_ce": 0.0011380409123376012, + "loss_iou": 0.4609375, + "loss_num": 0.01806640625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 374797460, + "step": 2967 + }, + { + "epoch": 0.7613672801898288, + "grad_norm": 30.078460693359375, + "learning_rate": 5e-06, + "loss": 0.881, + "num_input_tokens_seen": 374924040, + "step": 2968 + }, + { + "epoch": 0.7613672801898288, + "loss": 0.8513280153274536, + "loss_ce": 0.0017186026088893414, + "loss_iou": 0.390625, + "loss_num": 0.01324462890625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 374924040, + "step": 2968 + }, + { + "epoch": 0.7616238055537742, + "grad_norm": 39.2130241394043, + "learning_rate": 5e-06, + "loss": 0.9402, + "num_input_tokens_seen": 375050048, + "step": 2969 + }, + { + "epoch": 0.7616238055537742, + "loss": 1.0690040588378906, + "loss_ce": 0.0006447683554142714, + "loss_iou": 0.48046875, + "loss_num": 0.0216064453125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 375050048, + "step": 2969 + }, + { + "epoch": 0.7618803309177195, + "grad_norm": 62.42493438720703, + "learning_rate": 5e-06, + "loss": 0.905, + "num_input_tokens_seen": 375176432, + "step": 2970 + }, + { + "epoch": 0.7618803309177195, + "loss": 0.8308520913124084, + "loss_ce": 0.00028565197135321796, + "loss_iou": 0.390625, + "loss_num": 0.00958251953125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 375176432, + "step": 2970 + }, + { + "epoch": 0.7621368562816648, + "grad_norm": 49.771629333496094, + "learning_rate": 5e-06, + "loss": 0.9804, + "num_input_tokens_seen": 375303340, + "step": 2971 + }, + { + "epoch": 0.7621368562816648, + "loss": 1.097997784614563, + "loss_ce": 0.0022947019897401333, + "loss_iou": 0.49609375, + "loss_num": 0.020751953125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 375303340, + "step": 2971 + }, + { + "epoch": 0.7623933816456102, + "grad_norm": 18.602466583251953, + "learning_rate": 5e-06, + "loss": 0.9097, + "num_input_tokens_seen": 375429560, + "step": 2972 + }, + { + "epoch": 0.7623933816456102, + "loss": 0.8840033411979675, + "loss_ce": 0.002411535242572427, + "loss_iou": 0.40625, + "loss_num": 0.01409912109375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 375429560, + "step": 2972 + }, + { + "epoch": 0.7626499070095556, + "grad_norm": 16.01640510559082, + "learning_rate": 5e-06, + "loss": 0.9738, + "num_input_tokens_seen": 375554480, + "step": 2973 + }, + { + "epoch": 0.7626499070095556, + "loss": 0.9353116154670715, + "loss_ce": 0.00025296967942267656, + "loss_iou": 0.4375, + "loss_num": 0.01177978515625, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 375554480, + "step": 2973 + }, + { + "epoch": 0.762906432373501, + "grad_norm": 43.27749252319336, + "learning_rate": 5e-06, + "loss": 0.882, + "num_input_tokens_seen": 375680216, + "step": 2974 + }, + { + "epoch": 0.762906432373501, + "loss": 0.8702302575111389, + "loss_ce": 0.0013337796553969383, + "loss_iou": 0.396484375, + "loss_num": 0.01531982421875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 375680216, + "step": 2974 + }, + { + "epoch": 0.7631629577374462, + "grad_norm": 26.667980194091797, + "learning_rate": 5e-06, + "loss": 1.0156, + "num_input_tokens_seen": 375806072, + "step": 2975 + }, + { + "epoch": 0.7631629577374462, + "loss": 0.7538960576057434, + "loss_ce": 0.0024311933666467667, + "loss_iou": 0.357421875, + "loss_num": 0.007537841796875, + "loss_xval": 0.75, + "num_input_tokens_seen": 375806072, + "step": 2975 + }, + { + "epoch": 0.7634194831013916, + "grad_norm": 21.126110076904297, + "learning_rate": 5e-06, + "loss": 0.9138, + "num_input_tokens_seen": 375931316, + "step": 2976 + }, + { + "epoch": 0.7634194831013916, + "loss": 0.8953748941421509, + "loss_ce": 0.0023084767162799835, + "loss_iou": 0.421875, + "loss_num": 0.010009765625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 375931316, + "step": 2976 + }, + { + "epoch": 0.763676008465337, + "grad_norm": 19.52507781982422, + "learning_rate": 5e-06, + "loss": 0.9165, + "num_input_tokens_seen": 376057196, + "step": 2977 + }, + { + "epoch": 0.763676008465337, + "loss": 0.941990852355957, + "loss_ce": 0.007420550100505352, + "loss_iou": 0.419921875, + "loss_num": 0.018798828125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 376057196, + "step": 2977 + }, + { + "epoch": 0.7639325338292824, + "grad_norm": 29.70302391052246, + "learning_rate": 5e-06, + "loss": 0.9372, + "num_input_tokens_seen": 376182612, + "step": 2978 + }, + { + "epoch": 0.7639325338292824, + "loss": 0.8501605987548828, + "loss_ce": 0.0059223473072052, + "loss_iou": 0.3828125, + "loss_num": 0.01519775390625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 376182612, + "step": 2978 + }, + { + "epoch": 0.7641890591932278, + "grad_norm": 53.99999237060547, + "learning_rate": 5e-06, + "loss": 0.9796, + "num_input_tokens_seen": 376308192, + "step": 2979 + }, + { + "epoch": 0.7641890591932278, + "loss": 1.1052241325378418, + "loss_ce": 0.0004877225146628916, + "loss_iou": 0.49609375, + "loss_num": 0.02294921875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 376308192, + "step": 2979 + }, + { + "epoch": 0.764445584557173, + "grad_norm": 55.934852600097656, + "learning_rate": 5e-06, + "loss": 0.9695, + "num_input_tokens_seen": 376434436, + "step": 2980 + }, + { + "epoch": 0.764445584557173, + "loss": 0.9760003089904785, + "loss_ce": 0.0035882270894944668, + "loss_iou": 0.44140625, + "loss_num": 0.017822265625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 376434436, + "step": 2980 + }, + { + "epoch": 0.7647021099211184, + "grad_norm": 59.983070373535156, + "learning_rate": 5e-06, + "loss": 1.0775, + "num_input_tokens_seen": 376561456, + "step": 2981 + }, + { + "epoch": 0.7647021099211184, + "loss": 1.0145649909973145, + "loss_ce": 0.0008931195479817688, + "loss_iou": 0.466796875, + "loss_num": 0.0162353515625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 376561456, + "step": 2981 + }, + { + "epoch": 0.7649586352850638, + "grad_norm": 46.19498825073242, + "learning_rate": 5e-06, + "loss": 1.0677, + "num_input_tokens_seen": 376687132, + "step": 2982 + }, + { + "epoch": 0.7649586352850638, + "loss": 1.0763161182403564, + "loss_ce": 0.0006325061549432576, + "loss_iou": 0.50390625, + "loss_num": 0.0145263671875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 376687132, + "step": 2982 + }, + { + "epoch": 0.7652151606490092, + "grad_norm": 34.18132400512695, + "learning_rate": 5e-06, + "loss": 0.7849, + "num_input_tokens_seen": 376814420, + "step": 2983 + }, + { + "epoch": 0.7652151606490092, + "loss": 0.8837023973464966, + "loss_ce": 0.00088985834736377, + "loss_iou": 0.412109375, + "loss_num": 0.0118408203125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 376814420, + "step": 2983 + }, + { + "epoch": 0.7654716860129546, + "grad_norm": 45.58045196533203, + "learning_rate": 5e-06, + "loss": 1.0549, + "num_input_tokens_seen": 376941352, + "step": 2984 + }, + { + "epoch": 0.7654716860129546, + "loss": 1.245667815208435, + "loss_ce": 0.0044568730518221855, + "loss_iou": 0.54296875, + "loss_num": 0.031494140625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 376941352, + "step": 2984 + }, + { + "epoch": 0.7657282113768998, + "grad_norm": 51.73003387451172, + "learning_rate": 5e-06, + "loss": 0.9674, + "num_input_tokens_seen": 377067576, + "step": 2985 + }, + { + "epoch": 0.7657282113768998, + "loss": 1.0420193672180176, + "loss_ce": 0.005398171953856945, + "loss_iou": 0.466796875, + "loss_num": 0.02099609375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 377067576, + "step": 2985 + }, + { + "epoch": 0.7659847367408452, + "grad_norm": 49.06282424926758, + "learning_rate": 5e-06, + "loss": 0.985, + "num_input_tokens_seen": 377193704, + "step": 2986 + }, + { + "epoch": 0.7659847367408452, + "loss": 0.9154390096664429, + "loss_ce": 0.0033296116162091494, + "loss_iou": 0.4140625, + "loss_num": 0.016845703125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 377193704, + "step": 2986 + }, + { + "epoch": 0.7662412621047906, + "grad_norm": 106.10728454589844, + "learning_rate": 5e-06, + "loss": 1.0045, + "num_input_tokens_seen": 377321560, + "step": 2987 + }, + { + "epoch": 0.7662412621047906, + "loss": 0.9279747605323792, + "loss_ce": 0.0004845091898459941, + "loss_iou": 0.431640625, + "loss_num": 0.0130615234375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 377321560, + "step": 2987 + }, + { + "epoch": 0.766497787468736, + "grad_norm": 41.905357360839844, + "learning_rate": 5e-06, + "loss": 0.9095, + "num_input_tokens_seen": 377447356, + "step": 2988 + }, + { + "epoch": 0.766497787468736, + "loss": 0.9143307209014893, + "loss_ce": 0.0002681694459170103, + "loss_iou": 0.431640625, + "loss_num": 0.01031494140625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 377447356, + "step": 2988 + }, + { + "epoch": 0.7667543128326814, + "grad_norm": 55.05316925048828, + "learning_rate": 5e-06, + "loss": 0.8834, + "num_input_tokens_seen": 377573516, + "step": 2989 + }, + { + "epoch": 0.7667543128326814, + "loss": 0.9275453686714172, + "loss_ce": 0.001764070475474, + "loss_iou": 0.427734375, + "loss_num": 0.01397705078125, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 377573516, + "step": 2989 + }, + { + "epoch": 0.7670108381966267, + "grad_norm": 35.71797561645508, + "learning_rate": 5e-06, + "loss": 1.0149, + "num_input_tokens_seen": 377699840, + "step": 2990 + }, + { + "epoch": 0.7670108381966267, + "loss": 0.9864544868469238, + "loss_ce": 0.0035442793741822243, + "loss_iou": 0.44140625, + "loss_num": 0.0203857421875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 377699840, + "step": 2990 + }, + { + "epoch": 0.767267363560572, + "grad_norm": 49.32024383544922, + "learning_rate": 5e-06, + "loss": 0.8691, + "num_input_tokens_seen": 377827176, + "step": 2991 + }, + { + "epoch": 0.767267363560572, + "loss": 0.8510367274284363, + "loss_ce": 0.00045078134280629456, + "loss_iou": 0.400390625, + "loss_num": 0.01007080078125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 377827176, + "step": 2991 + }, + { + "epoch": 0.7675238889245174, + "grad_norm": 51.15583801269531, + "learning_rate": 5e-06, + "loss": 0.9877, + "num_input_tokens_seen": 377953664, + "step": 2992 + }, + { + "epoch": 0.7675238889245174, + "loss": 0.9211035966873169, + "loss_ce": 0.00020508574380073696, + "loss_iou": 0.43359375, + "loss_num": 0.01116943359375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 377953664, + "step": 2992 + }, + { + "epoch": 0.7677804142884628, + "grad_norm": 40.09506607055664, + "learning_rate": 5e-06, + "loss": 0.9218, + "num_input_tokens_seen": 378079844, + "step": 2993 + }, + { + "epoch": 0.7677804142884628, + "loss": 0.9057847261428833, + "loss_ce": 0.0005113178631290793, + "loss_iou": 0.419921875, + "loss_num": 0.0128173828125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 378079844, + "step": 2993 + }, + { + "epoch": 0.7680369396524082, + "grad_norm": 55.21718978881836, + "learning_rate": 5e-06, + "loss": 0.8465, + "num_input_tokens_seen": 378207008, + "step": 2994 + }, + { + "epoch": 0.7680369396524082, + "loss": 0.7604407072067261, + "loss_ce": 0.0001868056715466082, + "loss_iou": 0.357421875, + "loss_num": 0.0087890625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 378207008, + "step": 2994 + }, + { + "epoch": 0.7682934650163535, + "grad_norm": 73.54366302490234, + "learning_rate": 5e-06, + "loss": 1.0032, + "num_input_tokens_seen": 378333932, + "step": 2995 + }, + { + "epoch": 0.7682934650163535, + "loss": 0.9066866636276245, + "loss_ce": 0.0009249552385881543, + "loss_iou": 0.42578125, + "loss_num": 0.010498046875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 378333932, + "step": 2995 + }, + { + "epoch": 0.7685499903802988, + "grad_norm": 42.544898986816406, + "learning_rate": 5e-06, + "loss": 0.9589, + "num_input_tokens_seen": 378459972, + "step": 2996 + }, + { + "epoch": 0.7685499903802988, + "loss": 0.911292552947998, + "loss_ce": 0.0006479885196313262, + "loss_iou": 0.4296875, + "loss_num": 0.0103759765625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 378459972, + "step": 2996 + }, + { + "epoch": 0.7688065157442442, + "grad_norm": 34.70707321166992, + "learning_rate": 5e-06, + "loss": 0.9024, + "num_input_tokens_seen": 378586588, + "step": 2997 + }, + { + "epoch": 0.7688065157442442, + "loss": 0.7597208619117737, + "loss_ce": 0.0006876660045236349, + "loss_iou": 0.34375, + "loss_num": 0.01422119140625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 378586588, + "step": 2997 + }, + { + "epoch": 0.7690630411081896, + "grad_norm": 69.78291320800781, + "learning_rate": 5e-06, + "loss": 0.9562, + "num_input_tokens_seen": 378713680, + "step": 2998 + }, + { + "epoch": 0.7690630411081896, + "loss": 0.9241538047790527, + "loss_ce": 0.002278837375342846, + "loss_iou": 0.419921875, + "loss_num": 0.01611328125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 378713680, + "step": 2998 + }, + { + "epoch": 0.769319566472135, + "grad_norm": 39.56076431274414, + "learning_rate": 5e-06, + "loss": 0.9887, + "num_input_tokens_seen": 378839204, + "step": 2999 + }, + { + "epoch": 0.769319566472135, + "loss": 0.9081380367279053, + "loss_ce": 0.00042316113831475377, + "loss_iou": 0.423828125, + "loss_num": 0.01165771484375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 378839204, + "step": 2999 + }, + { + "epoch": 0.7695760918360803, + "grad_norm": 26.200607299804688, + "learning_rate": 5e-06, + "loss": 1.0636, + "num_input_tokens_seen": 378965548, + "step": 3000 + }, + { + "epoch": 0.7695760918360803, + "eval_icons_CIoU": 0.2715916112065315, + "eval_icons_GIoU": 0.22925589978694916, + "eval_icons_IoU": 0.44085457921028137, + "eval_icons_MAE_all": 0.025633021257817745, + "eval_icons_MAE_h": 0.03409300930798054, + "eval_icons_MAE_w": 0.05155266635119915, + "eval_icons_MAE_x_boxes": 0.051800886169075966, + "eval_icons_MAE_y_boxes": 0.034653183072805405, + "eval_icons_NUM_probability": 0.9998824000358582, + "eval_icons_inside_bbox": 0.6805555522441864, + "eval_icons_loss": 1.6419742107391357, + "eval_icons_loss_ce": 8.371191324840765e-05, + "eval_icons_loss_iou": 0.736083984375, + "eval_icons_loss_num": 0.026887893676757812, + "eval_icons_loss_xval": 1.60693359375, + "eval_icons_runtime": 48.0868, + "eval_icons_samples_per_second": 1.04, + "eval_icons_steps_per_second": 0.042, + "num_input_tokens_seen": 378965548, + "step": 3000 + }, + { + "epoch": 0.7695760918360803, + "eval_screenspot_CIoU": 0.11348300178845723, + "eval_screenspot_GIoU": 0.093210119754076, + "eval_screenspot_IoU": 0.2794196556011836, + "eval_screenspot_MAE_all": 0.07668468977014224, + "eval_screenspot_MAE_h": 0.07430399705966313, + "eval_screenspot_MAE_w": 0.11935861160357793, + "eval_screenspot_MAE_x_boxes": 0.09589457263549168, + "eval_screenspot_MAE_y_boxes": 0.05734619374076525, + "eval_screenspot_NUM_probability": 0.9999253153800964, + "eval_screenspot_inside_bbox": 0.6016666690508524, + "eval_screenspot_loss": 2.2438766956329346, + "eval_screenspot_loss_ce": 0.0013917963951826096, + "eval_screenspot_loss_iou": 0.9330240885416666, + "eval_screenspot_loss_num": 0.0814208984375, + "eval_screenspot_loss_xval": 2.2721354166666665, + "eval_screenspot_runtime": 89.7916, + "eval_screenspot_samples_per_second": 0.991, + "eval_screenspot_steps_per_second": 0.033, + "num_input_tokens_seen": 378965548, + "step": 3000 + }, + { + "epoch": 0.7695760918360803, + "loss": 2.152388095855713, + "loss_ce": 0.0010210180189460516, + "loss_iou": 0.90234375, + "loss_num": 0.0693359375, + "loss_xval": 2.15625, + "num_input_tokens_seen": 378965548, + "step": 3000 + }, + { + "epoch": 0.7698326172000256, + "grad_norm": 39.61350631713867, + "learning_rate": 5e-06, + "loss": 0.991, + "num_input_tokens_seen": 379091584, + "step": 3001 + }, + { + "epoch": 0.7698326172000256, + "loss": 1.0921118259429932, + "loss_ce": 0.001779855927452445, + "loss_iou": 0.51953125, + "loss_num": 0.01007080078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 379091584, + "step": 3001 + }, + { + "epoch": 0.770089142563971, + "grad_norm": 37.6118278503418, + "learning_rate": 5e-06, + "loss": 0.8713, + "num_input_tokens_seen": 379218960, + "step": 3002 + }, + { + "epoch": 0.770089142563971, + "loss": 0.7587677240371704, + "loss_ce": 0.0004669131012633443, + "loss_iou": 0.359375, + "loss_num": 0.007598876953125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 379218960, + "step": 3002 + }, + { + "epoch": 0.7703456679279164, + "grad_norm": 50.787174224853516, + "learning_rate": 5e-06, + "loss": 0.9134, + "num_input_tokens_seen": 379344380, + "step": 3003 + }, + { + "epoch": 0.7703456679279164, + "loss": 1.0935838222503662, + "loss_ce": 0.0022752871736884117, + "loss_iou": 0.49609375, + "loss_num": 0.01953125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 379344380, + "step": 3003 + }, + { + "epoch": 0.7706021932918617, + "grad_norm": 48.33881378173828, + "learning_rate": 5e-06, + "loss": 0.9195, + "num_input_tokens_seen": 379470860, + "step": 3004 + }, + { + "epoch": 0.7706021932918617, + "loss": 0.8394947052001953, + "loss_ce": 0.003068935591727495, + "loss_iou": 0.392578125, + "loss_num": 0.0103759765625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 379470860, + "step": 3004 + }, + { + "epoch": 0.7708587186558071, + "grad_norm": 42.270172119140625, + "learning_rate": 5e-06, + "loss": 0.9303, + "num_input_tokens_seen": 379595800, + "step": 3005 + }, + { + "epoch": 0.7708587186558071, + "loss": 0.9560731053352356, + "loss_ce": 0.0005066974554210901, + "loss_iou": 0.4375, + "loss_num": 0.015625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 379595800, + "step": 3005 + }, + { + "epoch": 0.7711152440197524, + "grad_norm": 48.07954025268555, + "learning_rate": 5e-06, + "loss": 0.8251, + "num_input_tokens_seen": 379722584, + "step": 3006 + }, + { + "epoch": 0.7711152440197524, + "loss": 0.8347651362419128, + "loss_ce": 0.0002924787113443017, + "loss_iou": 0.39453125, + "loss_num": 0.00872802734375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 379722584, + "step": 3006 + }, + { + "epoch": 0.7713717693836978, + "grad_norm": 67.18219757080078, + "learning_rate": 5e-06, + "loss": 1.0236, + "num_input_tokens_seen": 379848936, + "step": 3007 + }, + { + "epoch": 0.7713717693836978, + "loss": 0.8696813583374023, + "loss_ce": 0.0007848716923035681, + "loss_iou": 0.40625, + "loss_num": 0.0115966796875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 379848936, + "step": 3007 + }, + { + "epoch": 0.7716282947476432, + "grad_norm": 59.21671676635742, + "learning_rate": 5e-06, + "loss": 1.0638, + "num_input_tokens_seen": 379977028, + "step": 3008 + }, + { + "epoch": 0.7716282947476432, + "loss": 1.0312280654907227, + "loss_ce": 0.00046638547792099416, + "loss_iou": 0.482421875, + "loss_num": 0.012939453125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 379977028, + "step": 3008 + }, + { + "epoch": 0.7718848201115885, + "grad_norm": 52.688758850097656, + "learning_rate": 5e-06, + "loss": 0.8988, + "num_input_tokens_seen": 380103124, + "step": 3009 + }, + { + "epoch": 0.7718848201115885, + "loss": 0.8675566911697388, + "loss_ce": 0.00036926561733707786, + "loss_iou": 0.416015625, + "loss_num": 0.007080078125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 380103124, + "step": 3009 + }, + { + "epoch": 0.7721413454755339, + "grad_norm": 42.36954116821289, + "learning_rate": 5e-06, + "loss": 0.9485, + "num_input_tokens_seen": 380229504, + "step": 3010 + }, + { + "epoch": 0.7721413454755339, + "loss": 0.766742467880249, + "loss_ce": 0.0001409777905791998, + "loss_iou": 0.3671875, + "loss_num": 0.00628662109375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 380229504, + "step": 3010 + }, + { + "epoch": 0.7723978708394793, + "grad_norm": 28.404325485229492, + "learning_rate": 5e-06, + "loss": 0.9648, + "num_input_tokens_seen": 380356656, + "step": 3011 + }, + { + "epoch": 0.7723978708394793, + "loss": 0.9414765238761902, + "loss_ce": 0.0005585274193435907, + "loss_iou": 0.443359375, + "loss_num": 0.010498046875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 380356656, + "step": 3011 + }, + { + "epoch": 0.7726543962034246, + "grad_norm": 41.81505584716797, + "learning_rate": 5e-06, + "loss": 0.9214, + "num_input_tokens_seen": 380483164, + "step": 3012 + }, + { + "epoch": 0.7726543962034246, + "loss": 0.8766902685165405, + "loss_ce": 0.0002254076098324731, + "loss_iou": 0.416015625, + "loss_num": 0.009033203125, + "loss_xval": 0.875, + "num_input_tokens_seen": 380483164, + "step": 3012 + }, + { + "epoch": 0.77291092156737, + "grad_norm": 53.625797271728516, + "learning_rate": 5e-06, + "loss": 1.0151, + "num_input_tokens_seen": 380609572, + "step": 3013 + }, + { + "epoch": 0.77291092156737, + "loss": 1.062394142150879, + "loss_ce": 0.0018473234958946705, + "loss_iou": 0.46875, + "loss_num": 0.02392578125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 380609572, + "step": 3013 + }, + { + "epoch": 0.7731674469313153, + "grad_norm": 48.934505462646484, + "learning_rate": 5e-06, + "loss": 0.9882, + "num_input_tokens_seen": 380735436, + "step": 3014 + }, + { + "epoch": 0.7731674469313153, + "loss": 1.048119306564331, + "loss_ce": 0.0012443226296454668, + "loss_iou": 0.470703125, + "loss_num": 0.0208740234375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 380735436, + "step": 3014 + }, + { + "epoch": 0.7734239722952607, + "grad_norm": 27.41706085205078, + "learning_rate": 5e-06, + "loss": 0.9804, + "num_input_tokens_seen": 380861936, + "step": 3015 + }, + { + "epoch": 0.7734239722952607, + "loss": 0.9805446863174438, + "loss_ce": 0.0005642024334520102, + "loss_iou": 0.443359375, + "loss_num": 0.018798828125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 380861936, + "step": 3015 + }, + { + "epoch": 0.7736804976592061, + "grad_norm": 36.12276077270508, + "learning_rate": 5e-06, + "loss": 0.9816, + "num_input_tokens_seen": 380986832, + "step": 3016 + }, + { + "epoch": 0.7736804976592061, + "loss": 1.3909986019134521, + "loss_ce": 0.005744654685258865, + "loss_iou": 0.59375, + "loss_num": 0.039794921875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 380986832, + "step": 3016 + }, + { + "epoch": 0.7739370230231514, + "grad_norm": 44.791900634765625, + "learning_rate": 5e-06, + "loss": 0.963, + "num_input_tokens_seen": 381112624, + "step": 3017 + }, + { + "epoch": 0.7739370230231514, + "loss": 1.0637054443359375, + "loss_ce": 0.0012053779792040586, + "loss_iou": 0.458984375, + "loss_num": 0.0286865234375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 381112624, + "step": 3017 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 63.07220458984375, + "learning_rate": 5e-06, + "loss": 0.9542, + "num_input_tokens_seen": 381239928, + "step": 3018 + }, + { + "epoch": 0.7741935483870968, + "loss": 0.9526422023773193, + "loss_ce": 0.0004937045741826296, + "loss_iou": 0.447265625, + "loss_num": 0.011962890625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 381239928, + "step": 3018 + }, + { + "epoch": 0.7744500737510421, + "grad_norm": 76.1841812133789, + "learning_rate": 5e-06, + "loss": 0.8689, + "num_input_tokens_seen": 381366424, + "step": 3019 + }, + { + "epoch": 0.7744500737510421, + "loss": 0.8687546849250793, + "loss_ce": 0.0001023576915031299, + "loss_iou": 0.419921875, + "loss_num": 0.005645751953125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 381366424, + "step": 3019 + }, + { + "epoch": 0.7747065991149875, + "grad_norm": 62.443397521972656, + "learning_rate": 5e-06, + "loss": 1.1276, + "num_input_tokens_seen": 381492424, + "step": 3020 + }, + { + "epoch": 0.7747065991149875, + "loss": 1.1721076965332031, + "loss_ce": 0.00023272990074474365, + "loss_iou": 0.546875, + "loss_num": 0.01611328125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 381492424, + "step": 3020 + }, + { + "epoch": 0.7749631244789329, + "grad_norm": 17.240564346313477, + "learning_rate": 5e-06, + "loss": 0.9824, + "num_input_tokens_seen": 381618560, + "step": 3021 + }, + { + "epoch": 0.7749631244789329, + "loss": 0.6908792853355408, + "loss_ce": 0.00044960560626350343, + "loss_iou": 0.330078125, + "loss_num": 0.00604248046875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 381618560, + "step": 3021 + }, + { + "epoch": 0.7752196498428782, + "grad_norm": 39.995811462402344, + "learning_rate": 5e-06, + "loss": 0.8494, + "num_input_tokens_seen": 381745728, + "step": 3022 + }, + { + "epoch": 0.7752196498428782, + "loss": 0.7599807381629944, + "loss_ce": 0.0016799264121800661, + "loss_iou": 0.3515625, + "loss_num": 0.0108642578125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 381745728, + "step": 3022 + }, + { + "epoch": 0.7754761752068235, + "grad_norm": 69.50076293945312, + "learning_rate": 5e-06, + "loss": 0.9594, + "num_input_tokens_seen": 381872328, + "step": 3023 + }, + { + "epoch": 0.7754761752068235, + "loss": 0.9520664215087891, + "loss_ce": 0.0033359513618052006, + "loss_iou": 0.443359375, + "loss_num": 0.01263427734375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 381872328, + "step": 3023 + }, + { + "epoch": 0.7757327005707689, + "grad_norm": 34.990447998046875, + "learning_rate": 5e-06, + "loss": 1.0454, + "num_input_tokens_seen": 381997828, + "step": 3024 + }, + { + "epoch": 0.7757327005707689, + "loss": 1.1258909702301025, + "loss_ce": 0.000890886876732111, + "loss_iou": 0.515625, + "loss_num": 0.0194091796875, + "loss_xval": 1.125, + "num_input_tokens_seen": 381997828, + "step": 3024 + }, + { + "epoch": 0.7759892259347143, + "grad_norm": 34.07530975341797, + "learning_rate": 5e-06, + "loss": 0.8934, + "num_input_tokens_seen": 382123876, + "step": 3025 + }, + { + "epoch": 0.7759892259347143, + "loss": 0.9310852289199829, + "loss_ce": 0.008233648724853992, + "loss_iou": 0.431640625, + "loss_num": 0.0115966796875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 382123876, + "step": 3025 + }, + { + "epoch": 0.7762457512986597, + "grad_norm": 57.07090377807617, + "learning_rate": 5e-06, + "loss": 0.8994, + "num_input_tokens_seen": 382250488, + "step": 3026 + }, + { + "epoch": 0.7762457512986597, + "loss": 0.9241904020309448, + "loss_ce": 0.003291929606348276, + "loss_iou": 0.4296875, + "loss_num": 0.01190185546875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 382250488, + "step": 3026 + }, + { + "epoch": 0.776502276662605, + "grad_norm": 37.11333084106445, + "learning_rate": 5e-06, + "loss": 0.8982, + "num_input_tokens_seen": 382377808, + "step": 3027 + }, + { + "epoch": 0.776502276662605, + "loss": 1.0019052028656006, + "loss_ce": 0.0006844633026048541, + "loss_iou": 0.46875, + "loss_num": 0.01239013671875, + "loss_xval": 1.0, + "num_input_tokens_seen": 382377808, + "step": 3027 + }, + { + "epoch": 0.7767588020265503, + "grad_norm": 26.464027404785156, + "learning_rate": 5e-06, + "loss": 0.8721, + "num_input_tokens_seen": 382503112, + "step": 3028 + }, + { + "epoch": 0.7767588020265503, + "loss": 0.893409013748169, + "loss_ce": 0.00034256139770150185, + "loss_iou": 0.41796875, + "loss_num": 0.01165771484375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 382503112, + "step": 3028 + }, + { + "epoch": 0.7770153273904957, + "grad_norm": 38.20924758911133, + "learning_rate": 5e-06, + "loss": 0.9256, + "num_input_tokens_seen": 382629792, + "step": 3029 + }, + { + "epoch": 0.7770153273904957, + "loss": 0.8053667545318604, + "loss_ce": 0.00019098969642072916, + "loss_iou": 0.3828125, + "loss_num": 0.00799560546875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 382629792, + "step": 3029 + }, + { + "epoch": 0.7772718527544411, + "grad_norm": 51.976417541503906, + "learning_rate": 5e-06, + "loss": 1.0032, + "num_input_tokens_seen": 382756156, + "step": 3030 + }, + { + "epoch": 0.7772718527544411, + "loss": 0.9837273955345154, + "loss_ce": 0.0013055421877652407, + "loss_iou": 0.44921875, + "loss_num": 0.017333984375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 382756156, + "step": 3030 + }, + { + "epoch": 0.7775283781183865, + "grad_norm": 62.554107666015625, + "learning_rate": 5e-06, + "loss": 0.9092, + "num_input_tokens_seen": 382883032, + "step": 3031 + }, + { + "epoch": 0.7775283781183865, + "loss": 0.8148177862167358, + "loss_ce": 0.0023177447728812695, + "loss_iou": 0.380859375, + "loss_num": 0.01055908203125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 382883032, + "step": 3031 + }, + { + "epoch": 0.7777849034823319, + "grad_norm": 47.41301727294922, + "learning_rate": 5e-06, + "loss": 0.914, + "num_input_tokens_seen": 383009900, + "step": 3032 + }, + { + "epoch": 0.7777849034823319, + "loss": 0.7995867729187012, + "loss_ce": 0.00027039897395297885, + "loss_iou": 0.380859375, + "loss_num": 0.00738525390625, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 383009900, + "step": 3032 + }, + { + "epoch": 0.7780414288462771, + "grad_norm": 28.284042358398438, + "learning_rate": 5e-06, + "loss": 0.9561, + "num_input_tokens_seen": 383136588, + "step": 3033 + }, + { + "epoch": 0.7780414288462771, + "loss": 0.9323970079421997, + "loss_ce": 0.0010005261283367872, + "loss_iou": 0.4296875, + "loss_num": 0.01483154296875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 383136588, + "step": 3033 + }, + { + "epoch": 0.7782979542102225, + "grad_norm": 22.20311164855957, + "learning_rate": 5e-06, + "loss": 0.8834, + "num_input_tokens_seen": 383263676, + "step": 3034 + }, + { + "epoch": 0.7782979542102225, + "loss": 1.0042827129364014, + "loss_ce": 0.0011088561732321978, + "loss_iou": 0.45703125, + "loss_num": 0.017578125, + "loss_xval": 1.0, + "num_input_tokens_seen": 383263676, + "step": 3034 + }, + { + "epoch": 0.7785544795741679, + "grad_norm": 15.141229629516602, + "learning_rate": 5e-06, + "loss": 0.8124, + "num_input_tokens_seen": 383389164, + "step": 3035 + }, + { + "epoch": 0.7785544795741679, + "loss": 0.8462780714035034, + "loss_ce": 0.0010631745681166649, + "loss_iou": 0.39453125, + "loss_num": 0.0108642578125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 383389164, + "step": 3035 + }, + { + "epoch": 0.7788110049381133, + "grad_norm": 35.572723388671875, + "learning_rate": 5e-06, + "loss": 0.9491, + "num_input_tokens_seen": 383515416, + "step": 3036 + }, + { + "epoch": 0.7788110049381133, + "loss": 0.9321386814117432, + "loss_ce": 0.0009863422019407153, + "loss_iou": 0.41796875, + "loss_num": 0.01904296875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 383515416, + "step": 3036 + }, + { + "epoch": 0.7790675303020587, + "grad_norm": 67.17974090576172, + "learning_rate": 5e-06, + "loss": 0.91, + "num_input_tokens_seen": 383642408, + "step": 3037 + }, + { + "epoch": 0.7790675303020587, + "loss": 0.944837212562561, + "loss_ce": 0.0005012737237848341, + "loss_iou": 0.4375, + "loss_num": 0.013916015625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 383642408, + "step": 3037 + }, + { + "epoch": 0.7793240556660039, + "grad_norm": 50.37013244628906, + "learning_rate": 5e-06, + "loss": 1.0588, + "num_input_tokens_seen": 383769316, + "step": 3038 + }, + { + "epoch": 0.7793240556660039, + "loss": 1.0241467952728271, + "loss_ce": 0.006080355029553175, + "loss_iou": 0.455078125, + "loss_num": 0.0216064453125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 383769316, + "step": 3038 + }, + { + "epoch": 0.7795805810299493, + "grad_norm": 19.267210006713867, + "learning_rate": 5e-06, + "loss": 0.9066, + "num_input_tokens_seen": 383896128, + "step": 3039 + }, + { + "epoch": 0.7795805810299493, + "loss": 0.9568158388137817, + "loss_ce": 0.0002728258550632745, + "loss_iou": 0.451171875, + "loss_num": 0.01055908203125, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 383896128, + "step": 3039 + }, + { + "epoch": 0.7798371063938947, + "grad_norm": 24.765140533447266, + "learning_rate": 5e-06, + "loss": 0.9509, + "num_input_tokens_seen": 384022236, + "step": 3040 + }, + { + "epoch": 0.7798371063938947, + "loss": 1.1795620918273926, + "loss_ce": 0.001827769330702722, + "loss_iou": 0.52734375, + "loss_num": 0.0238037109375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 384022236, + "step": 3040 + }, + { + "epoch": 0.7800936317578401, + "grad_norm": 88.75298309326172, + "learning_rate": 5e-06, + "loss": 0.8898, + "num_input_tokens_seen": 384149364, + "step": 3041 + }, + { + "epoch": 0.7800936317578401, + "loss": 0.914636492729187, + "loss_ce": 8.572454680688679e-05, + "loss_iou": 0.42578125, + "loss_num": 0.0125732421875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 384149364, + "step": 3041 + }, + { + "epoch": 0.7803501571217855, + "grad_norm": 31.261775970458984, + "learning_rate": 5e-06, + "loss": 0.8775, + "num_input_tokens_seen": 384275644, + "step": 3042 + }, + { + "epoch": 0.7803501571217855, + "loss": 0.985925018787384, + "loss_ce": 0.0020382849033921957, + "loss_iou": 0.43359375, + "loss_num": 0.02294921875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 384275644, + "step": 3042 + }, + { + "epoch": 0.7806066824857307, + "grad_norm": 35.07978057861328, + "learning_rate": 5e-06, + "loss": 0.9951, + "num_input_tokens_seen": 384402040, + "step": 3043 + }, + { + "epoch": 0.7806066824857307, + "loss": 0.9254921078681946, + "loss_ce": 0.0006873985403217375, + "loss_iou": 0.427734375, + "loss_num": 0.01409912109375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 384402040, + "step": 3043 + }, + { + "epoch": 0.7808632078496761, + "grad_norm": 35.637516021728516, + "learning_rate": 5e-06, + "loss": 0.9422, + "num_input_tokens_seen": 384528144, + "step": 3044 + }, + { + "epoch": 0.7808632078496761, + "loss": 0.9885154962539673, + "loss_ce": 0.00023422783124260604, + "loss_iou": 0.455078125, + "loss_num": 0.015869140625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 384528144, + "step": 3044 + }, + { + "epoch": 0.7811197332136215, + "grad_norm": 38.63658905029297, + "learning_rate": 5e-06, + "loss": 0.911, + "num_input_tokens_seen": 384653276, + "step": 3045 + }, + { + "epoch": 0.7811197332136215, + "loss": 0.9750688076019287, + "loss_ce": 0.0019242276903241873, + "loss_iou": 0.453125, + "loss_num": 0.0133056640625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 384653276, + "step": 3045 + }, + { + "epoch": 0.7813762585775669, + "grad_norm": 41.50048828125, + "learning_rate": 5e-06, + "loss": 1.0844, + "num_input_tokens_seen": 384778668, + "step": 3046 + }, + { + "epoch": 0.7813762585775669, + "loss": 1.0076630115509033, + "loss_ce": 0.0003388008917681873, + "loss_iou": 0.458984375, + "loss_num": 0.01806640625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 384778668, + "step": 3046 + }, + { + "epoch": 0.7816327839415123, + "grad_norm": 39.8167839050293, + "learning_rate": 5e-06, + "loss": 0.7765, + "num_input_tokens_seen": 384905076, + "step": 3047 + }, + { + "epoch": 0.7816327839415123, + "loss": 0.7993919253349304, + "loss_ce": 0.0005637963768094778, + "loss_iou": 0.369140625, + "loss_num": 0.0123291015625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 384905076, + "step": 3047 + }, + { + "epoch": 0.7818893093054575, + "grad_norm": 42.49872589111328, + "learning_rate": 5e-06, + "loss": 0.9302, + "num_input_tokens_seen": 385031044, + "step": 3048 + }, + { + "epoch": 0.7818893093054575, + "loss": 0.975603461265564, + "loss_ce": 0.001970694400370121, + "loss_iou": 0.423828125, + "loss_num": 0.025146484375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 385031044, + "step": 3048 + }, + { + "epoch": 0.7821458346694029, + "grad_norm": 46.245094299316406, + "learning_rate": 5e-06, + "loss": 1.0241, + "num_input_tokens_seen": 385155604, + "step": 3049 + }, + { + "epoch": 0.7821458346694029, + "loss": 0.9808222055435181, + "loss_ce": 0.0027949195355176926, + "loss_iou": 0.4609375, + "loss_num": 0.0115966796875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 385155604, + "step": 3049 + }, + { + "epoch": 0.7824023600333483, + "grad_norm": 49.91230773925781, + "learning_rate": 5e-06, + "loss": 0.9161, + "num_input_tokens_seen": 385281284, + "step": 3050 + }, + { + "epoch": 0.7824023600333483, + "loss": 0.9461783170700073, + "loss_ce": 0.0013540246291086078, + "loss_iou": 0.44140625, + "loss_num": 0.012451171875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 385281284, + "step": 3050 + }, + { + "epoch": 0.7826588853972937, + "grad_norm": 58.8674430847168, + "learning_rate": 5e-06, + "loss": 1.0414, + "num_input_tokens_seen": 385406940, + "step": 3051 + }, + { + "epoch": 0.7826588853972937, + "loss": 0.7682812809944153, + "loss_ce": 0.001679693814367056, + "loss_iou": 0.357421875, + "loss_num": 0.01031494140625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 385406940, + "step": 3051 + }, + { + "epoch": 0.782915410761239, + "grad_norm": 51.56389236450195, + "learning_rate": 5e-06, + "loss": 1.0213, + "num_input_tokens_seen": 385533600, + "step": 3052 + }, + { + "epoch": 0.782915410761239, + "loss": 0.9266822338104248, + "loss_ce": 0.0050513967871665955, + "loss_iou": 0.419921875, + "loss_num": 0.016357421875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 385533600, + "step": 3052 + }, + { + "epoch": 0.7831719361251844, + "grad_norm": 106.46561431884766, + "learning_rate": 5e-06, + "loss": 0.9104, + "num_input_tokens_seen": 385660204, + "step": 3053 + }, + { + "epoch": 0.7831719361251844, + "loss": 0.9358047246932983, + "loss_ce": 0.0031875246204435825, + "loss_iou": 0.421875, + "loss_num": 0.017333984375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 385660204, + "step": 3053 + }, + { + "epoch": 0.7834284614891297, + "grad_norm": 46.1313362121582, + "learning_rate": 5e-06, + "loss": 0.9279, + "num_input_tokens_seen": 385786304, + "step": 3054 + }, + { + "epoch": 0.7834284614891297, + "loss": 1.1432561874389648, + "loss_ce": 0.0006780330440960824, + "loss_iou": 0.52734375, + "loss_num": 0.0179443359375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 385786304, + "step": 3054 + }, + { + "epoch": 0.7836849868530751, + "grad_norm": 21.14231300354004, + "learning_rate": 5e-06, + "loss": 0.8038, + "num_input_tokens_seen": 385912576, + "step": 3055 + }, + { + "epoch": 0.7836849868530751, + "loss": 0.8215305805206299, + "loss_ce": 0.0014622495509684086, + "loss_iou": 0.380859375, + "loss_num": 0.01153564453125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 385912576, + "step": 3055 + }, + { + "epoch": 0.7839415122170205, + "grad_norm": 34.69921875, + "learning_rate": 5e-06, + "loss": 0.7799, + "num_input_tokens_seen": 386037960, + "step": 3056 + }, + { + "epoch": 0.7839415122170205, + "loss": 0.8710594773292542, + "loss_ce": 0.00045407257857732475, + "loss_iou": 0.40625, + "loss_num": 0.0113525390625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 386037960, + "step": 3056 + }, + { + "epoch": 0.7841980375809658, + "grad_norm": 76.0654067993164, + "learning_rate": 5e-06, + "loss": 0.9109, + "num_input_tokens_seen": 386163468, + "step": 3057 + }, + { + "epoch": 0.7841980375809658, + "loss": 0.9688147306442261, + "loss_ce": 6.476055568782613e-05, + "loss_iou": 0.45703125, + "loss_num": 0.01092529296875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 386163468, + "step": 3057 + }, + { + "epoch": 0.7844545629449112, + "grad_norm": 54.776973724365234, + "learning_rate": 5e-06, + "loss": 0.9482, + "num_input_tokens_seen": 386289100, + "step": 3058 + }, + { + "epoch": 0.7844545629449112, + "loss": 0.8281857967376709, + "loss_ce": 0.0005491084302775562, + "loss_iou": 0.396484375, + "loss_num": 0.0069580078125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 386289100, + "step": 3058 + }, + { + "epoch": 0.7847110883088565, + "grad_norm": 47.83889389038086, + "learning_rate": 5e-06, + "loss": 0.9508, + "num_input_tokens_seen": 386415284, + "step": 3059 + }, + { + "epoch": 0.7847110883088565, + "loss": 0.9336451292037964, + "loss_ce": 0.0005396935739554465, + "loss_iou": 0.42578125, + "loss_num": 0.01611328125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 386415284, + "step": 3059 + }, + { + "epoch": 0.7849676136728019, + "grad_norm": 53.74713897705078, + "learning_rate": 5e-06, + "loss": 0.9436, + "num_input_tokens_seen": 386541896, + "step": 3060 + }, + { + "epoch": 0.7849676136728019, + "loss": 0.8373483419418335, + "loss_ce": 0.0014108092291280627, + "loss_iou": 0.390625, + "loss_num": 0.0111083984375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 386541896, + "step": 3060 + }, + { + "epoch": 0.7852241390367473, + "grad_norm": 55.16197204589844, + "learning_rate": 5e-06, + "loss": 0.9996, + "num_input_tokens_seen": 386668432, + "step": 3061 + }, + { + "epoch": 0.7852241390367473, + "loss": 1.105919361114502, + "loss_ce": 0.0004506285476963967, + "loss_iou": 0.51171875, + "loss_num": 0.016357421875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 386668432, + "step": 3061 + }, + { + "epoch": 0.7854806644006926, + "grad_norm": 50.37675857543945, + "learning_rate": 5e-06, + "loss": 0.9933, + "num_input_tokens_seen": 386793896, + "step": 3062 + }, + { + "epoch": 0.7854806644006926, + "loss": 0.8566752672195435, + "loss_ce": 0.00022993976017460227, + "loss_iou": 0.408203125, + "loss_num": 0.00830078125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 386793896, + "step": 3062 + }, + { + "epoch": 0.785737189764638, + "grad_norm": 44.92658615112305, + "learning_rate": 5e-06, + "loss": 0.9512, + "num_input_tokens_seen": 386919852, + "step": 3063 + }, + { + "epoch": 0.785737189764638, + "loss": 1.0447295904159546, + "loss_ce": 0.0012725600972771645, + "loss_iou": 0.484375, + "loss_num": 0.015380859375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 386919852, + "step": 3063 + }, + { + "epoch": 0.7859937151285833, + "grad_norm": 39.00270080566406, + "learning_rate": 5e-06, + "loss": 1.0037, + "num_input_tokens_seen": 387045140, + "step": 3064 + }, + { + "epoch": 0.7859937151285833, + "loss": 1.053246259689331, + "loss_ce": 0.0005117832915857434, + "loss_iou": 0.494140625, + "loss_num": 0.01318359375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 387045140, + "step": 3064 + }, + { + "epoch": 0.7862502404925287, + "grad_norm": 34.826873779296875, + "learning_rate": 5e-06, + "loss": 0.8594, + "num_input_tokens_seen": 387171884, + "step": 3065 + }, + { + "epoch": 0.7862502404925287, + "loss": 0.9140823483467102, + "loss_ce": 0.0009963997872546315, + "loss_iou": 0.42578125, + "loss_num": 0.01251220703125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 387171884, + "step": 3065 + }, + { + "epoch": 0.786506765856474, + "grad_norm": 38.07713317871094, + "learning_rate": 5e-06, + "loss": 0.8119, + "num_input_tokens_seen": 387296856, + "step": 3066 + }, + { + "epoch": 0.786506765856474, + "loss": 0.8891111016273499, + "loss_ce": 0.001415822422131896, + "loss_iou": 0.408203125, + "loss_num": 0.01409912109375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 387296856, + "step": 3066 + }, + { + "epoch": 0.7867632912204194, + "grad_norm": 40.21628189086914, + "learning_rate": 5e-06, + "loss": 0.9851, + "num_input_tokens_seen": 387423016, + "step": 3067 + }, + { + "epoch": 0.7867632912204194, + "loss": 0.9070166349411011, + "loss_ce": 0.0007666609599255025, + "loss_iou": 0.42578125, + "loss_num": 0.01055908203125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 387423016, + "step": 3067 + }, + { + "epoch": 0.7870198165843648, + "grad_norm": 40.25064468383789, + "learning_rate": 5e-06, + "loss": 0.9425, + "num_input_tokens_seen": 387549604, + "step": 3068 + }, + { + "epoch": 0.7870198165843648, + "loss": 1.142991304397583, + "loss_ce": 0.00798153318464756, + "loss_iou": 0.5, + "loss_num": 0.0269775390625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 387549604, + "step": 3068 + }, + { + "epoch": 0.7872763419483101, + "grad_norm": 36.8310546875, + "learning_rate": 5e-06, + "loss": 0.9357, + "num_input_tokens_seen": 387675432, + "step": 3069 + }, + { + "epoch": 0.7872763419483101, + "loss": 0.9660939574241638, + "loss_ce": 0.0046682171523571014, + "loss_iou": 0.439453125, + "loss_num": 0.0167236328125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 387675432, + "step": 3069 + }, + { + "epoch": 0.7875328673122555, + "grad_norm": 45.4752082824707, + "learning_rate": 5e-06, + "loss": 0.8779, + "num_input_tokens_seen": 387801364, + "step": 3070 + }, + { + "epoch": 0.7875328673122555, + "loss": 0.8811048269271851, + "loss_ce": 0.0007337399292737246, + "loss_iou": 0.40234375, + "loss_num": 0.0147705078125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 387801364, + "step": 3070 + }, + { + "epoch": 0.7877893926762009, + "grad_norm": 57.98426818847656, + "learning_rate": 5e-06, + "loss": 1.0281, + "num_input_tokens_seen": 387927692, + "step": 3071 + }, + { + "epoch": 0.7877893926762009, + "loss": 1.0504921674728394, + "loss_ce": 0.0031288685277104378, + "loss_iou": 0.490234375, + "loss_num": 0.01373291015625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 387927692, + "step": 3071 + }, + { + "epoch": 0.7880459180401462, + "grad_norm": 50.42306137084961, + "learning_rate": 5e-06, + "loss": 0.9701, + "num_input_tokens_seen": 388054244, + "step": 3072 + }, + { + "epoch": 0.7880459180401462, + "loss": 1.0604848861694336, + "loss_ce": 0.0043325782753527164, + "loss_iou": 0.466796875, + "loss_num": 0.0240478515625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 388054244, + "step": 3072 + }, + { + "epoch": 0.7883024434040916, + "grad_norm": 58.65945816040039, + "learning_rate": 5e-06, + "loss": 0.928, + "num_input_tokens_seen": 388180224, + "step": 3073 + }, + { + "epoch": 0.7883024434040916, + "loss": 0.846748948097229, + "loss_ce": 0.0008016748470254242, + "loss_iou": 0.404296875, + "loss_num": 0.007659912109375, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 388180224, + "step": 3073 + }, + { + "epoch": 0.788558968768037, + "grad_norm": 53.31142807006836, + "learning_rate": 5e-06, + "loss": 1.012, + "num_input_tokens_seen": 388306492, + "step": 3074 + }, + { + "epoch": 0.788558968768037, + "loss": 0.9080705642700195, + "loss_ce": 0.000355702533852309, + "loss_iou": 0.41796875, + "loss_num": 0.0146484375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 388306492, + "step": 3074 + }, + { + "epoch": 0.7888154941319823, + "grad_norm": 39.9885139465332, + "learning_rate": 5e-06, + "loss": 0.9417, + "num_input_tokens_seen": 388432928, + "step": 3075 + }, + { + "epoch": 0.7888154941319823, + "loss": 0.9045344591140747, + "loss_ce": 0.00023756037990096956, + "loss_iou": 0.41015625, + "loss_num": 0.0166015625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 388432928, + "step": 3075 + }, + { + "epoch": 0.7890720194959276, + "grad_norm": 55.082977294921875, + "learning_rate": 5e-06, + "loss": 1.009, + "num_input_tokens_seen": 388559860, + "step": 3076 + }, + { + "epoch": 0.7890720194959276, + "loss": 0.9225090742111206, + "loss_ce": 0.00014580338029190898, + "loss_iou": 0.423828125, + "loss_num": 0.0147705078125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 388559860, + "step": 3076 + }, + { + "epoch": 0.789328544859873, + "grad_norm": 55.650543212890625, + "learning_rate": 5e-06, + "loss": 1.0466, + "num_input_tokens_seen": 388686488, + "step": 3077 + }, + { + "epoch": 0.789328544859873, + "loss": 1.1113238334655762, + "loss_ce": 0.0014606040203943849, + "loss_iou": 0.494140625, + "loss_num": 0.024169921875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 388686488, + "step": 3077 + }, + { + "epoch": 0.7895850702238184, + "grad_norm": 38.75040054321289, + "learning_rate": 5e-06, + "loss": 0.9456, + "num_input_tokens_seen": 388813484, + "step": 3078 + }, + { + "epoch": 0.7895850702238184, + "loss": 1.078554391860962, + "loss_ce": 0.0004293875826988369, + "loss_iou": 0.47265625, + "loss_num": 0.026611328125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 388813484, + "step": 3078 + }, + { + "epoch": 0.7898415955877638, + "grad_norm": 50.038455963134766, + "learning_rate": 5e-06, + "loss": 0.8794, + "num_input_tokens_seen": 388939600, + "step": 3079 + }, + { + "epoch": 0.7898415955877638, + "loss": 0.9207192063331604, + "loss_ce": 0.0005532123032025993, + "loss_iou": 0.41015625, + "loss_num": 0.0203857421875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 388939600, + "step": 3079 + }, + { + "epoch": 0.7900981209517091, + "grad_norm": 37.45415115356445, + "learning_rate": 5e-06, + "loss": 0.9496, + "num_input_tokens_seen": 389065308, + "step": 3080 + }, + { + "epoch": 0.7900981209517091, + "loss": 1.005045771598816, + "loss_ce": 0.0006511914543807507, + "loss_iou": 0.462890625, + "loss_num": 0.015625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 389065308, + "step": 3080 + }, + { + "epoch": 0.7903546463156544, + "grad_norm": 20.321819305419922, + "learning_rate": 5e-06, + "loss": 0.9958, + "num_input_tokens_seen": 389190980, + "step": 3081 + }, + { + "epoch": 0.7903546463156544, + "loss": 1.104810118675232, + "loss_ce": 0.00129443418700248, + "loss_iou": 0.51171875, + "loss_num": 0.015869140625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 389190980, + "step": 3081 + }, + { + "epoch": 0.7906111716795998, + "grad_norm": 26.987560272216797, + "learning_rate": 5e-06, + "loss": 0.9259, + "num_input_tokens_seen": 389316912, + "step": 3082 + }, + { + "epoch": 0.7906111716795998, + "loss": 0.9806532859802246, + "loss_ce": 0.0011610669316723943, + "loss_iou": 0.46875, + "loss_num": 0.0089111328125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 389316912, + "step": 3082 + }, + { + "epoch": 0.7908676970435452, + "grad_norm": 31.571420669555664, + "learning_rate": 5e-06, + "loss": 0.8396, + "num_input_tokens_seen": 389443040, + "step": 3083 + }, + { + "epoch": 0.7908676970435452, + "loss": 0.7252503037452698, + "loss_ce": 0.0001526724372524768, + "loss_iou": 0.34375, + "loss_num": 0.0079345703125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 389443040, + "step": 3083 + }, + { + "epoch": 0.7911242224074906, + "grad_norm": 29.58075714111328, + "learning_rate": 5e-06, + "loss": 0.9714, + "num_input_tokens_seen": 389570332, + "step": 3084 + }, + { + "epoch": 0.7911242224074906, + "loss": 0.7235045433044434, + "loss_ce": 0.00035997500526718795, + "loss_iou": 0.337890625, + "loss_num": 0.009765625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 389570332, + "step": 3084 + }, + { + "epoch": 0.7913807477714359, + "grad_norm": 22.08127212524414, + "learning_rate": 5e-06, + "loss": 0.9881, + "num_input_tokens_seen": 389696608, + "step": 3085 + }, + { + "epoch": 0.7913807477714359, + "loss": 1.1187113523483276, + "loss_ce": 0.0015238930936902761, + "loss_iou": 0.494140625, + "loss_num": 0.0255126953125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 389696608, + "step": 3085 + }, + { + "epoch": 0.7916372731353812, + "grad_norm": 19.641843795776367, + "learning_rate": 5e-06, + "loss": 0.8665, + "num_input_tokens_seen": 389823304, + "step": 3086 + }, + { + "epoch": 0.7916372731353812, + "loss": 0.8002245426177979, + "loss_ce": 0.00041983346454799175, + "loss_iou": 0.3828125, + "loss_num": 0.006927490234375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 389823304, + "step": 3086 + }, + { + "epoch": 0.7918937984993266, + "grad_norm": 47.14255905151367, + "learning_rate": 5e-06, + "loss": 0.8626, + "num_input_tokens_seen": 389950836, + "step": 3087 + }, + { + "epoch": 0.7918937984993266, + "loss": 1.2280123233795166, + "loss_ce": 0.002426381688565016, + "loss_iou": 0.55078125, + "loss_num": 0.02490234375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 389950836, + "step": 3087 + }, + { + "epoch": 0.792150323863272, + "grad_norm": 61.10469436645508, + "learning_rate": 5e-06, + "loss": 0.9253, + "num_input_tokens_seen": 390077604, + "step": 3088 + }, + { + "epoch": 0.792150323863272, + "loss": 1.0511298179626465, + "loss_ce": 0.0032782277557998896, + "loss_iou": 0.48046875, + "loss_num": 0.01708984375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 390077604, + "step": 3088 + }, + { + "epoch": 0.7924068492272174, + "grad_norm": 49.937538146972656, + "learning_rate": 5e-06, + "loss": 0.8659, + "num_input_tokens_seen": 390205340, + "step": 3089 + }, + { + "epoch": 0.7924068492272174, + "loss": 0.8371323347091675, + "loss_ce": 0.00021825528529006988, + "loss_iou": 0.39453125, + "loss_num": 0.00958251953125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 390205340, + "step": 3089 + }, + { + "epoch": 0.7926633745911627, + "grad_norm": 44.953121185302734, + "learning_rate": 5e-06, + "loss": 0.8822, + "num_input_tokens_seen": 390331272, + "step": 3090 + }, + { + "epoch": 0.7926633745911627, + "loss": 0.8408341407775879, + "loss_ce": 0.0014786667888984084, + "loss_iou": 0.384765625, + "loss_num": 0.0137939453125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 390331272, + "step": 3090 + }, + { + "epoch": 0.792919899955108, + "grad_norm": 45.703529357910156, + "learning_rate": 5e-06, + "loss": 1.1057, + "num_input_tokens_seen": 390455964, + "step": 3091 + }, + { + "epoch": 0.792919899955108, + "loss": 1.3112016916275024, + "loss_ce": 0.0006548682576976717, + "loss_iou": 0.58984375, + "loss_num": 0.0262451171875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 390455964, + "step": 3091 + }, + { + "epoch": 0.7931764253190534, + "grad_norm": 34.5233039855957, + "learning_rate": 5e-06, + "loss": 0.9791, + "num_input_tokens_seen": 390582688, + "step": 3092 + }, + { + "epoch": 0.7931764253190534, + "loss": 1.1034932136535645, + "loss_ce": 0.0004658452235162258, + "loss_iou": 0.5, + "loss_num": 0.0208740234375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 390582688, + "step": 3092 + }, + { + "epoch": 0.7934329506829988, + "grad_norm": 35.85054016113281, + "learning_rate": 5e-06, + "loss": 0.9568, + "num_input_tokens_seen": 390709032, + "step": 3093 + }, + { + "epoch": 0.7934329506829988, + "loss": 0.9763548374176025, + "loss_ce": 0.0017454602057114244, + "loss_iou": 0.451171875, + "loss_num": 0.01458740234375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 390709032, + "step": 3093 + }, + { + "epoch": 0.7936894760469442, + "grad_norm": 45.00306701660156, + "learning_rate": 5e-06, + "loss": 0.8363, + "num_input_tokens_seen": 390834400, + "step": 3094 + }, + { + "epoch": 0.7936894760469442, + "loss": 0.7976964712142944, + "loss_ce": 8.904878632165492e-05, + "loss_iou": 0.3828125, + "loss_num": 0.006439208984375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 390834400, + "step": 3094 + }, + { + "epoch": 0.7939460014108896, + "grad_norm": 37.954097747802734, + "learning_rate": 5e-06, + "loss": 0.9845, + "num_input_tokens_seen": 390960484, + "step": 3095 + }, + { + "epoch": 0.7939460014108896, + "loss": 0.8590784668922424, + "loss_ce": 0.003121423302218318, + "loss_iou": 0.39453125, + "loss_num": 0.01373291015625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 390960484, + "step": 3095 + }, + { + "epoch": 0.7942025267748348, + "grad_norm": 46.48175048828125, + "learning_rate": 5e-06, + "loss": 0.8321, + "num_input_tokens_seen": 391087304, + "step": 3096 + }, + { + "epoch": 0.7942025267748348, + "loss": 0.8609225749969482, + "loss_ce": 0.001059305272065103, + "loss_iou": 0.40234375, + "loss_num": 0.01068115234375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 391087304, + "step": 3096 + }, + { + "epoch": 0.7944590521387802, + "grad_norm": 49.74597930908203, + "learning_rate": 5e-06, + "loss": 0.9653, + "num_input_tokens_seen": 391213192, + "step": 3097 + }, + { + "epoch": 0.7944590521387802, + "loss": 0.8918407559394836, + "loss_ce": 0.001703995163552463, + "loss_iou": 0.40234375, + "loss_num": 0.016845703125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 391213192, + "step": 3097 + }, + { + "epoch": 0.7947155775027256, + "grad_norm": 84.275146484375, + "learning_rate": 5e-06, + "loss": 0.9694, + "num_input_tokens_seen": 391339468, + "step": 3098 + }, + { + "epoch": 0.7947155775027256, + "loss": 1.1038521528244019, + "loss_ce": 0.0027779145166277885, + "loss_iou": 0.51171875, + "loss_num": 0.0159912109375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 391339468, + "step": 3098 + }, + { + "epoch": 0.794972102866671, + "grad_norm": 49.505489349365234, + "learning_rate": 5e-06, + "loss": 1.0382, + "num_input_tokens_seen": 391466212, + "step": 3099 + }, + { + "epoch": 0.794972102866671, + "loss": 1.1364291906356812, + "loss_ce": 0.00019877107115462422, + "loss_iou": 0.51171875, + "loss_num": 0.0225830078125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 391466212, + "step": 3099 + }, + { + "epoch": 0.7952286282306164, + "grad_norm": 40.72956466674805, + "learning_rate": 5e-06, + "loss": 0.9539, + "num_input_tokens_seen": 391591472, + "step": 3100 + }, + { + "epoch": 0.7952286282306164, + "loss": 0.9128215312957764, + "loss_ce": 0.0055949389934539795, + "loss_iou": 0.421875, + "loss_num": 0.01239013671875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 391591472, + "step": 3100 + }, + { + "epoch": 0.7954851535945616, + "grad_norm": 50.402278900146484, + "learning_rate": 5e-06, + "loss": 0.9337, + "num_input_tokens_seen": 391718036, + "step": 3101 + }, + { + "epoch": 0.7954851535945616, + "loss": 0.9913931488990784, + "loss_ce": 0.0004263713490217924, + "loss_iou": 0.45703125, + "loss_num": 0.014892578125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 391718036, + "step": 3101 + }, + { + "epoch": 0.795741678958507, + "grad_norm": 67.98584747314453, + "learning_rate": 5e-06, + "loss": 0.9695, + "num_input_tokens_seen": 391845188, + "step": 3102 + }, + { + "epoch": 0.795741678958507, + "loss": 0.971010684967041, + "loss_ce": 0.0027489603962749243, + "loss_iou": 0.4609375, + "loss_num": 0.009521484375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 391845188, + "step": 3102 + }, + { + "epoch": 0.7959982043224524, + "grad_norm": 68.68293762207031, + "learning_rate": 5e-06, + "loss": 0.8934, + "num_input_tokens_seen": 391971272, + "step": 3103 + }, + { + "epoch": 0.7959982043224524, + "loss": 0.8458807468414307, + "loss_ce": 0.00652527529746294, + "loss_iou": 0.40234375, + "loss_num": 0.0068359375, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 391971272, + "step": 3103 + }, + { + "epoch": 0.7962547296863978, + "grad_norm": 39.30879211425781, + "learning_rate": 5e-06, + "loss": 0.8551, + "num_input_tokens_seen": 392097096, + "step": 3104 + }, + { + "epoch": 0.7962547296863978, + "loss": 0.7589737176895142, + "loss_ce": 0.0006729763117618859, + "loss_iou": 0.36328125, + "loss_num": 0.006622314453125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 392097096, + "step": 3104 + }, + { + "epoch": 0.7965112550503431, + "grad_norm": 40.417484283447266, + "learning_rate": 5e-06, + "loss": 1.0567, + "num_input_tokens_seen": 392224252, + "step": 3105 + }, + { + "epoch": 0.7965112550503431, + "loss": 1.0038267374038696, + "loss_ce": 0.0008971041534096003, + "loss_iou": 0.462890625, + "loss_num": 0.015625, + "loss_xval": 1.0, + "num_input_tokens_seen": 392224252, + "step": 3105 + }, + { + "epoch": 0.7967677804142884, + "grad_norm": 52.2308235168457, + "learning_rate": 5e-06, + "loss": 0.9827, + "num_input_tokens_seen": 392349728, + "step": 3106 + }, + { + "epoch": 0.7967677804142884, + "loss": 0.7861615419387817, + "loss_ce": 0.0017377049662172794, + "loss_iou": 0.37109375, + "loss_num": 0.0087890625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 392349728, + "step": 3106 + }, + { + "epoch": 0.7970243057782338, + "grad_norm": 43.75012969970703, + "learning_rate": 5e-06, + "loss": 0.9801, + "num_input_tokens_seen": 392474724, + "step": 3107 + }, + { + "epoch": 0.7970243057782338, + "loss": 1.2422311305999756, + "loss_ce": 0.0029732901602983475, + "loss_iou": 0.5625, + "loss_num": 0.022705078125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 392474724, + "step": 3107 + }, + { + "epoch": 0.7972808311421792, + "grad_norm": 33.877655029296875, + "learning_rate": 5e-06, + "loss": 0.9545, + "num_input_tokens_seen": 392601368, + "step": 3108 + }, + { + "epoch": 0.7972808311421792, + "loss": 0.9168969392776489, + "loss_ce": 0.00039299181662499905, + "loss_iou": 0.431640625, + "loss_num": 0.01104736328125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 392601368, + "step": 3108 + }, + { + "epoch": 0.7975373565061246, + "grad_norm": 21.786861419677734, + "learning_rate": 5e-06, + "loss": 0.8185, + "num_input_tokens_seen": 392727452, + "step": 3109 + }, + { + "epoch": 0.7975373565061246, + "loss": 0.8314062356948853, + "loss_ce": 0.0003515729622449726, + "loss_iou": 0.392578125, + "loss_num": 0.00885009765625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 392727452, + "step": 3109 + }, + { + "epoch": 0.7977938818700699, + "grad_norm": 34.653987884521484, + "learning_rate": 5e-06, + "loss": 0.8347, + "num_input_tokens_seen": 392853612, + "step": 3110 + }, + { + "epoch": 0.7977938818700699, + "loss": 0.8902150392532349, + "loss_ce": 0.0005665870849043131, + "loss_iou": 0.40625, + "loss_num": 0.015380859375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 392853612, + "step": 3110 + }, + { + "epoch": 0.7980504072340152, + "grad_norm": 37.5821418762207, + "learning_rate": 5e-06, + "loss": 0.9794, + "num_input_tokens_seen": 392979808, + "step": 3111 + }, + { + "epoch": 0.7980504072340152, + "loss": 1.1269629001617432, + "loss_ce": 0.0034277555532753468, + "loss_iou": 0.5, + "loss_num": 0.0244140625, + "loss_xval": 1.125, + "num_input_tokens_seen": 392979808, + "step": 3111 + }, + { + "epoch": 0.7983069325979606, + "grad_norm": 56.82450866699219, + "learning_rate": 5e-06, + "loss": 0.9147, + "num_input_tokens_seen": 393106504, + "step": 3112 + }, + { + "epoch": 0.7983069325979606, + "loss": 0.8906527161598206, + "loss_ce": 0.0007601350080221891, + "loss_iou": 0.412109375, + "loss_num": 0.01300048828125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 393106504, + "step": 3112 + }, + { + "epoch": 0.798563457961906, + "grad_norm": 49.6131706237793, + "learning_rate": 5e-06, + "loss": 1.0482, + "num_input_tokens_seen": 393232564, + "step": 3113 + }, + { + "epoch": 0.798563457961906, + "loss": 1.0119107961654663, + "loss_ce": 0.00019203465490136296, + "loss_iou": 0.47265625, + "loss_num": 0.01373291015625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 393232564, + "step": 3113 + }, + { + "epoch": 0.7988199833258514, + "grad_norm": 33.8688850402832, + "learning_rate": 5e-06, + "loss": 0.8939, + "num_input_tokens_seen": 393359368, + "step": 3114 + }, + { + "epoch": 0.7988199833258514, + "loss": 1.0526267290115356, + "loss_ce": 0.00038062920793890953, + "loss_iou": 0.474609375, + "loss_num": 0.0203857421875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 393359368, + "step": 3114 + }, + { + "epoch": 0.7990765086897967, + "grad_norm": 47.047576904296875, + "learning_rate": 5e-06, + "loss": 0.927, + "num_input_tokens_seen": 393486312, + "step": 3115 + }, + { + "epoch": 0.7990765086897967, + "loss": 0.8975297212600708, + "loss_ce": 0.0005570473149418831, + "loss_iou": 0.416015625, + "loss_num": 0.01287841796875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 393486312, + "step": 3115 + }, + { + "epoch": 0.799333034053742, + "grad_norm": 43.14470291137695, + "learning_rate": 5e-06, + "loss": 1.0042, + "num_input_tokens_seen": 393612316, + "step": 3116 + }, + { + "epoch": 0.799333034053742, + "loss": 1.037779450416565, + "loss_ce": 0.0011583586456254125, + "loss_iou": 0.484375, + "loss_num": 0.01300048828125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 393612316, + "step": 3116 + }, + { + "epoch": 0.7995895594176874, + "grad_norm": 47.63432312011719, + "learning_rate": 5e-06, + "loss": 0.9377, + "num_input_tokens_seen": 393738564, + "step": 3117 + }, + { + "epoch": 0.7995895594176874, + "loss": 0.9726184606552124, + "loss_ce": 0.0009387761820107698, + "loss_iou": 0.453125, + "loss_num": 0.01318359375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 393738564, + "step": 3117 + }, + { + "epoch": 0.7998460847816328, + "grad_norm": 50.42628479003906, + "learning_rate": 5e-06, + "loss": 0.9502, + "num_input_tokens_seen": 393864052, + "step": 3118 + }, + { + "epoch": 0.7998460847816328, + "loss": 0.9378687143325806, + "loss_ce": 0.0015894039534032345, + "loss_iou": 0.41796875, + "loss_num": 0.0198974609375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 393864052, + "step": 3118 + }, + { + "epoch": 0.8001026101455782, + "grad_norm": 60.873382568359375, + "learning_rate": 5e-06, + "loss": 0.9443, + "num_input_tokens_seen": 393989444, + "step": 3119 + }, + { + "epoch": 0.8001026101455782, + "loss": 1.2508931159973145, + "loss_ce": 0.0023579103872179985, + "loss_iou": 0.57421875, + "loss_num": 0.020751953125, + "loss_xval": 1.25, + "num_input_tokens_seen": 393989444, + "step": 3119 + }, + { + "epoch": 0.8003591355095235, + "grad_norm": 50.71394348144531, + "learning_rate": 5e-06, + "loss": 0.9171, + "num_input_tokens_seen": 394115116, + "step": 3120 + }, + { + "epoch": 0.8003591355095235, + "loss": 1.1381449699401855, + "loss_ce": 0.0019144968828186393, + "loss_iou": 0.5, + "loss_num": 0.027099609375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 394115116, + "step": 3120 + }, + { + "epoch": 0.8006156608734689, + "grad_norm": 43.38496017456055, + "learning_rate": 5e-06, + "loss": 0.8378, + "num_input_tokens_seen": 394239804, + "step": 3121 + }, + { + "epoch": 0.8006156608734689, + "loss": 0.7447963953018188, + "loss_ce": 0.0016323348972946405, + "loss_iou": 0.353515625, + "loss_num": 0.007110595703125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 394239804, + "step": 3121 + }, + { + "epoch": 0.8008721862374142, + "grad_norm": 48.97633743286133, + "learning_rate": 5e-06, + "loss": 0.8758, + "num_input_tokens_seen": 394366860, + "step": 3122 + }, + { + "epoch": 0.8008721862374142, + "loss": 0.9008373022079468, + "loss_ce": 0.0023998278193175793, + "loss_iou": 0.4140625, + "loss_num": 0.013916015625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 394366860, + "step": 3122 + }, + { + "epoch": 0.8011287116013596, + "grad_norm": 48.60335159301758, + "learning_rate": 5e-06, + "loss": 1.0576, + "num_input_tokens_seen": 394492244, + "step": 3123 + }, + { + "epoch": 0.8011287116013596, + "loss": 1.323155403137207, + "loss_ce": 0.0004016145830973983, + "loss_iou": 0.59375, + "loss_num": 0.0279541015625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 394492244, + "step": 3123 + }, + { + "epoch": 0.801385236965305, + "grad_norm": 33.23029708862305, + "learning_rate": 5e-06, + "loss": 0.8828, + "num_input_tokens_seen": 394618296, + "step": 3124 + }, + { + "epoch": 0.801385236965305, + "loss": 0.8089559078216553, + "loss_ce": 0.0008504430879838765, + "loss_iou": 0.38671875, + "loss_num": 0.00677490234375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 394618296, + "step": 3124 + }, + { + "epoch": 0.8016417623292503, + "grad_norm": 41.514434814453125, + "learning_rate": 5e-06, + "loss": 0.9045, + "num_input_tokens_seen": 394744560, + "step": 3125 + }, + { + "epoch": 0.8016417623292503, + "loss": 0.7629473805427551, + "loss_ce": 0.00025207901489920914, + "loss_iou": 0.357421875, + "loss_num": 0.00909423828125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 394744560, + "step": 3125 + }, + { + "epoch": 0.8018982876931957, + "grad_norm": 64.35153198242188, + "learning_rate": 5e-06, + "loss": 0.9244, + "num_input_tokens_seen": 394871456, + "step": 3126 + }, + { + "epoch": 0.8018982876931957, + "loss": 0.842146635055542, + "loss_ce": 0.002791227074339986, + "loss_iou": 0.384765625, + "loss_num": 0.013671875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 394871456, + "step": 3126 + }, + { + "epoch": 0.802154813057141, + "grad_norm": 43.053836822509766, + "learning_rate": 5e-06, + "loss": 1.0103, + "num_input_tokens_seen": 394997160, + "step": 3127 + }, + { + "epoch": 0.802154813057141, + "loss": 1.09250807762146, + "loss_ce": 0.0026643122546374798, + "loss_iou": 0.5, + "loss_num": 0.0181884765625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 394997160, + "step": 3127 + }, + { + "epoch": 0.8024113384210864, + "grad_norm": 34.35725402832031, + "learning_rate": 5e-06, + "loss": 0.9433, + "num_input_tokens_seen": 395122264, + "step": 3128 + }, + { + "epoch": 0.8024113384210864, + "loss": 0.9319309592247009, + "loss_ce": 0.0002903682179749012, + "loss_iou": 0.439453125, + "loss_num": 0.010986328125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 395122264, + "step": 3128 + }, + { + "epoch": 0.8026678637850317, + "grad_norm": 42.17582321166992, + "learning_rate": 5e-06, + "loss": 0.9783, + "num_input_tokens_seen": 395249636, + "step": 3129 + }, + { + "epoch": 0.8026678637850317, + "loss": 1.0318026542663574, + "loss_ce": 0.0005525524611584842, + "loss_iou": 0.46875, + "loss_num": 0.0191650390625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 395249636, + "step": 3129 + }, + { + "epoch": 0.8029243891489771, + "grad_norm": 40.160709381103516, + "learning_rate": 5e-06, + "loss": 0.9903, + "num_input_tokens_seen": 395375336, + "step": 3130 + }, + { + "epoch": 0.8029243891489771, + "loss": 0.8760836720466614, + "loss_ce": 0.00010716063843574375, + "loss_iou": 0.3984375, + "loss_num": 0.0159912109375, + "loss_xval": 0.875, + "num_input_tokens_seen": 395375336, + "step": 3130 + }, + { + "epoch": 0.8031809145129225, + "grad_norm": 54.88969802856445, + "learning_rate": 5e-06, + "loss": 0.9162, + "num_input_tokens_seen": 395501448, + "step": 3131 + }, + { + "epoch": 0.8031809145129225, + "loss": 0.904416024684906, + "loss_ce": 0.000607426802162081, + "loss_iou": 0.421875, + "loss_num": 0.012451171875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 395501448, + "step": 3131 + }, + { + "epoch": 0.8034374398768678, + "grad_norm": 45.18496322631836, + "learning_rate": 5e-06, + "loss": 0.9465, + "num_input_tokens_seen": 395627156, + "step": 3132 + }, + { + "epoch": 0.8034374398768678, + "loss": 0.9607712030410767, + "loss_ce": 0.0008102619904093444, + "loss_iou": 0.435546875, + "loss_num": 0.0174560546875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 395627156, + "step": 3132 + }, + { + "epoch": 0.8036939652408132, + "grad_norm": 35.832584381103516, + "learning_rate": 5e-06, + "loss": 0.9346, + "num_input_tokens_seen": 395753728, + "step": 3133 + }, + { + "epoch": 0.8036939652408132, + "loss": 0.9488492012023926, + "loss_ce": 0.00011872945469804108, + "loss_iou": 0.427734375, + "loss_num": 0.018798828125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 395753728, + "step": 3133 + }, + { + "epoch": 0.8039504906047585, + "grad_norm": 41.0698127746582, + "learning_rate": 5e-06, + "loss": 1.0287, + "num_input_tokens_seen": 395879752, + "step": 3134 + }, + { + "epoch": 0.8039504906047585, + "loss": 1.143174171447754, + "loss_ce": 0.0059671117924153805, + "loss_iou": 0.4921875, + "loss_num": 0.03076171875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 395879752, + "step": 3134 + }, + { + "epoch": 0.8042070159687039, + "grad_norm": 28.751188278198242, + "learning_rate": 5e-06, + "loss": 0.9802, + "num_input_tokens_seen": 396005892, + "step": 3135 + }, + { + "epoch": 0.8042070159687039, + "loss": 0.9827746152877808, + "loss_ce": 0.0018175948644056916, + "loss_iou": 0.4453125, + "loss_num": 0.017822265625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 396005892, + "step": 3135 + }, + { + "epoch": 0.8044635413326493, + "grad_norm": 27.045249938964844, + "learning_rate": 5e-06, + "loss": 0.8905, + "num_input_tokens_seen": 396132568, + "step": 3136 + }, + { + "epoch": 0.8044635413326493, + "loss": 0.9259840250015259, + "loss_ce": 0.000202813112991862, + "loss_iou": 0.43359375, + "loss_num": 0.01165771484375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 396132568, + "step": 3136 + }, + { + "epoch": 0.8047200666965946, + "grad_norm": 64.1505126953125, + "learning_rate": 5e-06, + "loss": 0.8853, + "num_input_tokens_seen": 396258604, + "step": 3137 + }, + { + "epoch": 0.8047200666965946, + "loss": 0.9470627307891846, + "loss_ce": 0.00028538814513012767, + "loss_iou": 0.447265625, + "loss_num": 0.010498046875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 396258604, + "step": 3137 + }, + { + "epoch": 0.80497659206054, + "grad_norm": 48.33416748046875, + "learning_rate": 5e-06, + "loss": 1.1007, + "num_input_tokens_seen": 396383996, + "step": 3138 + }, + { + "epoch": 0.80497659206054, + "loss": 1.1261286735534668, + "loss_ce": 0.0016169106820598245, + "loss_iou": 0.5, + "loss_num": 0.025146484375, + "loss_xval": 1.125, + "num_input_tokens_seen": 396383996, + "step": 3138 + }, + { + "epoch": 0.8052331174244853, + "grad_norm": 33.24272537231445, + "learning_rate": 5e-06, + "loss": 0.8993, + "num_input_tokens_seen": 396509420, + "step": 3139 + }, + { + "epoch": 0.8052331174244853, + "loss": 0.7244715094566345, + "loss_ce": 0.014022331684827805, + "loss_iou": 0.33203125, + "loss_num": 0.00933837890625, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 396509420, + "step": 3139 + }, + { + "epoch": 0.8054896427884307, + "grad_norm": 36.223541259765625, + "learning_rate": 5e-06, + "loss": 0.9825, + "num_input_tokens_seen": 396636616, + "step": 3140 + }, + { + "epoch": 0.8054896427884307, + "loss": 1.038661241531372, + "loss_ce": 0.002528465585783124, + "loss_iou": 0.46875, + "loss_num": 0.01953125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 396636616, + "step": 3140 + }, + { + "epoch": 0.8057461681523761, + "grad_norm": 66.0035171508789, + "learning_rate": 5e-06, + "loss": 1.0278, + "num_input_tokens_seen": 396764064, + "step": 3141 + }, + { + "epoch": 0.8057461681523761, + "loss": 1.2313523292541504, + "loss_ce": 0.00186012196354568, + "loss_iou": 0.53125, + "loss_num": 0.03271484375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 396764064, + "step": 3141 + }, + { + "epoch": 0.8060026935163215, + "grad_norm": 51.98939895629883, + "learning_rate": 5e-06, + "loss": 0.9746, + "num_input_tokens_seen": 396891000, + "step": 3142 + }, + { + "epoch": 0.8060026935163215, + "loss": 0.9852721095085144, + "loss_ce": 0.0004087829147465527, + "loss_iou": 0.453125, + "loss_num": 0.01544189453125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 396891000, + "step": 3142 + }, + { + "epoch": 0.8062592188802667, + "grad_norm": 49.060150146484375, + "learning_rate": 5e-06, + "loss": 0.8522, + "num_input_tokens_seen": 397017472, + "step": 3143 + }, + { + "epoch": 0.8062592188802667, + "loss": 0.8712638020515442, + "loss_ce": 0.0004141835088375956, + "loss_iou": 0.40625, + "loss_num": 0.0111083984375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 397017472, + "step": 3143 + }, + { + "epoch": 0.8065157442442121, + "grad_norm": 44.162437438964844, + "learning_rate": 5e-06, + "loss": 1.0092, + "num_input_tokens_seen": 397144052, + "step": 3144 + }, + { + "epoch": 0.8065157442442121, + "loss": 1.086037516593933, + "loss_ce": 0.0010765960905700922, + "loss_iou": 0.4921875, + "loss_num": 0.0201416015625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 397144052, + "step": 3144 + }, + { + "epoch": 0.8067722696081575, + "grad_norm": 32.407081604003906, + "learning_rate": 5e-06, + "loss": 0.9282, + "num_input_tokens_seen": 397269468, + "step": 3145 + }, + { + "epoch": 0.8067722696081575, + "loss": 1.0499733686447144, + "loss_ce": 0.0016334701795130968, + "loss_iou": 0.48828125, + "loss_num": 0.01483154296875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 397269468, + "step": 3145 + }, + { + "epoch": 0.8070287949721029, + "grad_norm": 33.57683563232422, + "learning_rate": 5e-06, + "loss": 0.8876, + "num_input_tokens_seen": 397396536, + "step": 3146 + }, + { + "epoch": 0.8070287949721029, + "loss": 0.8332091569900513, + "loss_ce": 0.00044546902063302696, + "loss_iou": 0.3984375, + "loss_num": 0.006744384765625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 397396536, + "step": 3146 + }, + { + "epoch": 0.8072853203360483, + "grad_norm": 29.294931411743164, + "learning_rate": 5e-06, + "loss": 0.9961, + "num_input_tokens_seen": 397521928, + "step": 3147 + }, + { + "epoch": 0.8072853203360483, + "loss": 0.9782864451408386, + "loss_ce": 0.0002590929507277906, + "loss_iou": 0.453125, + "loss_num": 0.0140380859375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 397521928, + "step": 3147 + }, + { + "epoch": 0.8075418456999935, + "grad_norm": 23.802471160888672, + "learning_rate": 5e-06, + "loss": 0.8407, + "num_input_tokens_seen": 397648072, + "step": 3148 + }, + { + "epoch": 0.8075418456999935, + "loss": 0.804663896560669, + "loss_ce": 0.00046469911467283964, + "loss_iou": 0.3828125, + "loss_num": 0.007598876953125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 397648072, + "step": 3148 + }, + { + "epoch": 0.8077983710639389, + "grad_norm": 31.42759895324707, + "learning_rate": 5e-06, + "loss": 0.8763, + "num_input_tokens_seen": 397773888, + "step": 3149 + }, + { + "epoch": 0.8077983710639389, + "loss": 0.8567143678665161, + "loss_ce": 0.001489719608798623, + "loss_iou": 0.388671875, + "loss_num": 0.015869140625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 397773888, + "step": 3149 + }, + { + "epoch": 0.8080548964278843, + "grad_norm": 57.75412368774414, + "learning_rate": 5e-06, + "loss": 0.8429, + "num_input_tokens_seen": 397899440, + "step": 3150 + }, + { + "epoch": 0.8080548964278843, + "loss": 0.7447916269302368, + "loss_ce": 0.003092440776526928, + "loss_iou": 0.34375, + "loss_num": 0.0106201171875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 397899440, + "step": 3150 + }, + { + "epoch": 0.8083114217918297, + "grad_norm": 47.10200500488281, + "learning_rate": 5e-06, + "loss": 0.9557, + "num_input_tokens_seen": 398024964, + "step": 3151 + }, + { + "epoch": 0.8083114217918297, + "loss": 0.8187114596366882, + "loss_ce": 0.0018169176764786243, + "loss_iou": 0.375, + "loss_num": 0.013427734375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 398024964, + "step": 3151 + }, + { + "epoch": 0.8085679471557751, + "grad_norm": 35.810646057128906, + "learning_rate": 5e-06, + "loss": 0.8332, + "num_input_tokens_seen": 398151848, + "step": 3152 + }, + { + "epoch": 0.8085679471557751, + "loss": 0.7030031681060791, + "loss_ce": 0.00134296587202698, + "loss_iou": 0.3359375, + "loss_num": 0.006103515625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 398151848, + "step": 3152 + }, + { + "epoch": 0.8088244725197203, + "grad_norm": 45.44815444946289, + "learning_rate": 5e-06, + "loss": 1.0157, + "num_input_tokens_seen": 398278352, + "step": 3153 + }, + { + "epoch": 0.8088244725197203, + "loss": 0.9875250458717346, + "loss_ce": 0.00022031800472177565, + "loss_iou": 0.44921875, + "loss_num": 0.017578125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 398278352, + "step": 3153 + }, + { + "epoch": 0.8090809978836657, + "grad_norm": 53.01268005371094, + "learning_rate": 5e-06, + "loss": 0.9729, + "num_input_tokens_seen": 398405184, + "step": 3154 + }, + { + "epoch": 0.8090809978836657, + "loss": 0.9092525839805603, + "loss_ce": 0.0005611785454675555, + "loss_iou": 0.41015625, + "loss_num": 0.017822265625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 398405184, + "step": 3154 + }, + { + "epoch": 0.8093375232476111, + "grad_norm": 55.66101837158203, + "learning_rate": 5e-06, + "loss": 0.9021, + "num_input_tokens_seen": 398532132, + "step": 3155 + }, + { + "epoch": 0.8093375232476111, + "loss": 0.8232783079147339, + "loss_ce": 0.0024775569327175617, + "loss_iou": 0.384765625, + "loss_num": 0.0107421875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 398532132, + "step": 3155 + }, + { + "epoch": 0.8095940486115565, + "grad_norm": 66.66975402832031, + "learning_rate": 5e-06, + "loss": 0.8899, + "num_input_tokens_seen": 398659532, + "step": 3156 + }, + { + "epoch": 0.8095940486115565, + "loss": 0.8999677300453186, + "loss_ce": 0.0005536452517844737, + "loss_iou": 0.419921875, + "loss_num": 0.01177978515625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 398659532, + "step": 3156 + }, + { + "epoch": 0.8098505739755019, + "grad_norm": 117.39408111572266, + "learning_rate": 5e-06, + "loss": 0.9921, + "num_input_tokens_seen": 398785336, + "step": 3157 + }, + { + "epoch": 0.8098505739755019, + "loss": 0.922140896320343, + "loss_ce": 0.00026588235050439835, + "loss_iou": 0.4375, + "loss_num": 0.0093994140625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 398785336, + "step": 3157 + }, + { + "epoch": 0.8101070993394471, + "grad_norm": 52.40734100341797, + "learning_rate": 5e-06, + "loss": 0.8438, + "num_input_tokens_seen": 398912656, + "step": 3158 + }, + { + "epoch": 0.8101070993394471, + "loss": 0.8181699514389038, + "loss_ce": 0.001275440095923841, + "loss_iou": 0.38671875, + "loss_num": 0.00897216796875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 398912656, + "step": 3158 + }, + { + "epoch": 0.8103636247033925, + "grad_norm": 36.86540222167969, + "learning_rate": 5e-06, + "loss": 0.8983, + "num_input_tokens_seen": 399039152, + "step": 3159 + }, + { + "epoch": 0.8103636247033925, + "loss": 0.8122996091842651, + "loss_ce": 0.0002878435770981014, + "loss_iou": 0.392578125, + "loss_num": 0.00579833984375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 399039152, + "step": 3159 + }, + { + "epoch": 0.8106201500673379, + "grad_norm": 53.35686492919922, + "learning_rate": 5e-06, + "loss": 0.9526, + "num_input_tokens_seen": 399165648, + "step": 3160 + }, + { + "epoch": 0.8106201500673379, + "loss": 1.077970266342163, + "loss_ce": 0.0003335924702696502, + "loss_iou": 0.49609375, + "loss_num": 0.016845703125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 399165648, + "step": 3160 + }, + { + "epoch": 0.8108766754312833, + "grad_norm": 42.312557220458984, + "learning_rate": 5e-06, + "loss": 0.9249, + "num_input_tokens_seen": 399291756, + "step": 3161 + }, + { + "epoch": 0.8108766754312833, + "loss": 1.0911002159118652, + "loss_ce": 0.005162663757801056, + "loss_iou": 0.474609375, + "loss_num": 0.02734375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 399291756, + "step": 3161 + }, + { + "epoch": 0.8111332007952287, + "grad_norm": 41.70225143432617, + "learning_rate": 5e-06, + "loss": 0.9386, + "num_input_tokens_seen": 399416972, + "step": 3162 + }, + { + "epoch": 0.8111332007952287, + "loss": 1.177584171295166, + "loss_ce": 0.003756015794351697, + "loss_iou": 0.53125, + "loss_num": 0.0223388671875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 399416972, + "step": 3162 + }, + { + "epoch": 0.811389726159174, + "grad_norm": 43.042259216308594, + "learning_rate": 5e-06, + "loss": 0.9237, + "num_input_tokens_seen": 399544000, + "step": 3163 + }, + { + "epoch": 0.811389726159174, + "loss": 0.9579127430915833, + "loss_ce": 0.00429946556687355, + "loss_iou": 0.4375, + "loss_num": 0.01611328125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 399544000, + "step": 3163 + }, + { + "epoch": 0.8116462515231193, + "grad_norm": 62.8327751159668, + "learning_rate": 5e-06, + "loss": 0.8871, + "num_input_tokens_seen": 399670520, + "step": 3164 + }, + { + "epoch": 0.8116462515231193, + "loss": 0.9716030359268188, + "loss_ce": 0.0006558262393809855, + "loss_iou": 0.447265625, + "loss_num": 0.0157470703125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 399670520, + "step": 3164 + }, + { + "epoch": 0.8119027768870647, + "grad_norm": 52.89523696899414, + "learning_rate": 5e-06, + "loss": 1.0049, + "num_input_tokens_seen": 399797440, + "step": 3165 + }, + { + "epoch": 0.8119027768870647, + "loss": 0.9875327348709106, + "loss_ce": 0.0051108356565237045, + "loss_iou": 0.4453125, + "loss_num": 0.01806640625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 399797440, + "step": 3165 + }, + { + "epoch": 0.8121593022510101, + "grad_norm": 44.42190933227539, + "learning_rate": 5e-06, + "loss": 0.8777, + "num_input_tokens_seen": 399925308, + "step": 3166 + }, + { + "epoch": 0.8121593022510101, + "loss": 0.819495677947998, + "loss_ce": 0.0026011669542640448, + "loss_iou": 0.380859375, + "loss_num": 0.0111083984375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 399925308, + "step": 3166 + }, + { + "epoch": 0.8124158276149555, + "grad_norm": 48.83903503417969, + "learning_rate": 5e-06, + "loss": 1.0113, + "num_input_tokens_seen": 400051380, + "step": 3167 + }, + { + "epoch": 0.8124158276149555, + "loss": 1.2051180601119995, + "loss_ce": 0.0010164931882172823, + "loss_iou": 0.546875, + "loss_num": 0.0228271484375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 400051380, + "step": 3167 + }, + { + "epoch": 0.8126723529789008, + "grad_norm": 36.837615966796875, + "learning_rate": 5e-06, + "loss": 0.801, + "num_input_tokens_seen": 400177088, + "step": 3168 + }, + { + "epoch": 0.8126723529789008, + "loss": 0.8255153894424438, + "loss_ce": 7.590333552798256e-05, + "loss_iou": 0.388671875, + "loss_num": 0.009521484375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 400177088, + "step": 3168 + }, + { + "epoch": 0.8129288783428461, + "grad_norm": 22.556442260742188, + "learning_rate": 5e-06, + "loss": 0.8226, + "num_input_tokens_seen": 400302784, + "step": 3169 + }, + { + "epoch": 0.8129288783428461, + "loss": 0.781917929649353, + "loss_ce": 0.001156178186647594, + "loss_iou": 0.36328125, + "loss_num": 0.0108642578125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 400302784, + "step": 3169 + }, + { + "epoch": 0.8131854037067915, + "grad_norm": 23.420791625976562, + "learning_rate": 5e-06, + "loss": 1.0049, + "num_input_tokens_seen": 400429252, + "step": 3170 + }, + { + "epoch": 0.8131854037067915, + "loss": 1.069065809249878, + "loss_ce": 0.0016830196836963296, + "loss_iou": 0.490234375, + "loss_num": 0.0169677734375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 400429252, + "step": 3170 + }, + { + "epoch": 0.8134419290707369, + "grad_norm": 53.57369613647461, + "learning_rate": 5e-06, + "loss": 0.874, + "num_input_tokens_seen": 400555228, + "step": 3171 + }, + { + "epoch": 0.8134419290707369, + "loss": 0.8449221849441528, + "loss_ce": 0.0014163292944431305, + "loss_iou": 0.390625, + "loss_num": 0.01287841796875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 400555228, + "step": 3171 + }, + { + "epoch": 0.8136984544346823, + "grad_norm": 50.760311126708984, + "learning_rate": 5e-06, + "loss": 0.8678, + "num_input_tokens_seen": 400681712, + "step": 3172 + }, + { + "epoch": 0.8136984544346823, + "loss": 1.0057241916656494, + "loss_ce": 0.000841474044136703, + "loss_iou": 0.474609375, + "loss_num": 0.01171875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 400681712, + "step": 3172 + }, + { + "epoch": 0.8139549797986276, + "grad_norm": 46.803897857666016, + "learning_rate": 5e-06, + "loss": 0.9955, + "num_input_tokens_seen": 400807436, + "step": 3173 + }, + { + "epoch": 0.8139549797986276, + "loss": 1.2205946445465088, + "loss_ce": 0.0013563185930252075, + "loss_iou": 0.5546875, + "loss_num": 0.0224609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 400807436, + "step": 3173 + }, + { + "epoch": 0.8142115051625729, + "grad_norm": 34.614871978759766, + "learning_rate": 5e-06, + "loss": 0.9042, + "num_input_tokens_seen": 400933092, + "step": 3174 + }, + { + "epoch": 0.8142115051625729, + "loss": 0.852138876914978, + "loss_ce": 0.0010646735318005085, + "loss_iou": 0.39453125, + "loss_num": 0.01220703125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 400933092, + "step": 3174 + }, + { + "epoch": 0.8144680305265183, + "grad_norm": 51.043357849121094, + "learning_rate": 5e-06, + "loss": 0.8636, + "num_input_tokens_seen": 401059572, + "step": 3175 + }, + { + "epoch": 0.8144680305265183, + "loss": 0.7405379414558411, + "loss_ce": 0.00030356721254065633, + "loss_iou": 0.353515625, + "loss_num": 0.006805419921875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 401059572, + "step": 3175 + }, + { + "epoch": 0.8147245558904637, + "grad_norm": 64.50495910644531, + "learning_rate": 5e-06, + "loss": 0.9762, + "num_input_tokens_seen": 401186116, + "step": 3176 + }, + { + "epoch": 0.8147245558904637, + "loss": 1.0540298223495483, + "loss_ce": 0.00031891546677798033, + "loss_iou": 0.4765625, + "loss_num": 0.02001953125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 401186116, + "step": 3176 + }, + { + "epoch": 0.814981081254409, + "grad_norm": 51.36925506591797, + "learning_rate": 5e-06, + "loss": 0.9022, + "num_input_tokens_seen": 401312508, + "step": 3177 + }, + { + "epoch": 0.814981081254409, + "loss": 0.8924277424812317, + "loss_ce": 0.00033793720649555326, + "loss_iou": 0.4140625, + "loss_num": 0.0133056640625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 401312508, + "step": 3177 + }, + { + "epoch": 0.8152376066183544, + "grad_norm": 41.818965911865234, + "learning_rate": 5e-06, + "loss": 0.8607, + "num_input_tokens_seen": 401437580, + "step": 3178 + }, + { + "epoch": 0.8152376066183544, + "loss": 0.766558051109314, + "loss_ce": 0.0009330391185358167, + "loss_iou": 0.3671875, + "loss_num": 0.006103515625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 401437580, + "step": 3178 + }, + { + "epoch": 0.8154941319822997, + "grad_norm": 42.16203308105469, + "learning_rate": 5e-06, + "loss": 1.0129, + "num_input_tokens_seen": 401563600, + "step": 3179 + }, + { + "epoch": 0.8154941319822997, + "loss": 1.0135772228240967, + "loss_ce": 0.0013702032156288624, + "loss_iou": 0.466796875, + "loss_num": 0.015625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 401563600, + "step": 3179 + }, + { + "epoch": 0.8157506573462451, + "grad_norm": 49.449100494384766, + "learning_rate": 5e-06, + "loss": 0.9013, + "num_input_tokens_seen": 401688944, + "step": 3180 + }, + { + "epoch": 0.8157506573462451, + "loss": 1.0203344821929932, + "loss_ce": 0.00226815277710557, + "loss_iou": 0.4609375, + "loss_num": 0.0194091796875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 401688944, + "step": 3180 + }, + { + "epoch": 0.8160071827101905, + "grad_norm": 46.67910385131836, + "learning_rate": 5e-06, + "loss": 0.9374, + "num_input_tokens_seen": 401815324, + "step": 3181 + }, + { + "epoch": 0.8160071827101905, + "loss": 0.9345064759254456, + "loss_ce": 0.0018892379011958838, + "loss_iou": 0.4296875, + "loss_num": 0.01458740234375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 401815324, + "step": 3181 + }, + { + "epoch": 0.8162637080741358, + "grad_norm": 60.749210357666016, + "learning_rate": 5e-06, + "loss": 0.9833, + "num_input_tokens_seen": 401942112, + "step": 3182 + }, + { + "epoch": 0.8162637080741358, + "loss": 1.0427157878875732, + "loss_ce": 0.00023534795036539435, + "loss_iou": 0.466796875, + "loss_num": 0.021728515625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 401942112, + "step": 3182 + }, + { + "epoch": 0.8165202334380812, + "grad_norm": 49.386295318603516, + "learning_rate": 5e-06, + "loss": 0.9689, + "num_input_tokens_seen": 402068544, + "step": 3183 + }, + { + "epoch": 0.8165202334380812, + "loss": 0.9832860827445984, + "loss_ce": 0.001840757904574275, + "loss_iou": 0.44921875, + "loss_num": 0.0167236328125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 402068544, + "step": 3183 + }, + { + "epoch": 0.8167767588020266, + "grad_norm": 27.673599243164062, + "learning_rate": 5e-06, + "loss": 0.9615, + "num_input_tokens_seen": 402194480, + "step": 3184 + }, + { + "epoch": 0.8167767588020266, + "loss": 0.9664819240570068, + "loss_ce": 0.0011499252868816257, + "loss_iou": 0.43359375, + "loss_num": 0.019287109375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 402194480, + "step": 3184 + }, + { + "epoch": 0.8170332841659719, + "grad_norm": 23.838529586791992, + "learning_rate": 5e-06, + "loss": 0.878, + "num_input_tokens_seen": 402321220, + "step": 3185 + }, + { + "epoch": 0.8170332841659719, + "loss": 1.0017471313476562, + "loss_ce": 0.002723683835938573, + "loss_iou": 0.443359375, + "loss_num": 0.0225830078125, + "loss_xval": 1.0, + "num_input_tokens_seen": 402321220, + "step": 3185 + }, + { + "epoch": 0.8172898095299173, + "grad_norm": 29.564373016357422, + "learning_rate": 5e-06, + "loss": 0.9108, + "num_input_tokens_seen": 402447132, + "step": 3186 + }, + { + "epoch": 0.8172898095299173, + "loss": 0.6269969344139099, + "loss_ce": 0.0002879344392567873, + "loss_iou": 0.296875, + "loss_num": 0.006591796875, + "loss_xval": 0.625, + "num_input_tokens_seen": 402447132, + "step": 3186 + }, + { + "epoch": 0.8175463348938626, + "grad_norm": 34.37434005737305, + "learning_rate": 5e-06, + "loss": 0.9106, + "num_input_tokens_seen": 402572156, + "step": 3187 + }, + { + "epoch": 0.8175463348938626, + "loss": 0.9285587072372437, + "loss_ce": 0.0003360353293828666, + "loss_iou": 0.44140625, + "loss_num": 0.009521484375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 402572156, + "step": 3187 + }, + { + "epoch": 0.817802860257808, + "grad_norm": 28.39679527282715, + "learning_rate": 5e-06, + "loss": 0.9188, + "num_input_tokens_seen": 402698244, + "step": 3188 + }, + { + "epoch": 0.817802860257808, + "loss": 0.8634181022644043, + "loss_ce": 0.00013684862642548978, + "loss_iou": 0.3984375, + "loss_num": 0.0133056640625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 402698244, + "step": 3188 + }, + { + "epoch": 0.8180593856217534, + "grad_norm": 45.80647277832031, + "learning_rate": 5e-06, + "loss": 0.8263, + "num_input_tokens_seen": 402824720, + "step": 3189 + }, + { + "epoch": 0.8180593856217534, + "loss": 0.5697246789932251, + "loss_ce": 0.0003887395723722875, + "loss_iou": 0.275390625, + "loss_num": 0.0037384033203125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 402824720, + "step": 3189 + }, + { + "epoch": 0.8183159109856987, + "grad_norm": 61.63531494140625, + "learning_rate": 5e-06, + "loss": 1.0177, + "num_input_tokens_seen": 402950984, + "step": 3190 + }, + { + "epoch": 0.8183159109856987, + "loss": 1.0950753688812256, + "loss_ce": 0.0015693942550569773, + "loss_iou": 0.4921875, + "loss_num": 0.0220947265625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 402950984, + "step": 3190 + }, + { + "epoch": 0.818572436349644, + "grad_norm": 53.73925018310547, + "learning_rate": 5e-06, + "loss": 0.9694, + "num_input_tokens_seen": 403077780, + "step": 3191 + }, + { + "epoch": 0.818572436349644, + "loss": 1.021195888519287, + "loss_ce": 0.0001997796935029328, + "loss_iou": 0.46875, + "loss_num": 0.016845703125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 403077780, + "step": 3191 + }, + { + "epoch": 0.8188289617135894, + "grad_norm": 45.77851867675781, + "learning_rate": 5e-06, + "loss": 0.9216, + "num_input_tokens_seen": 403203532, + "step": 3192 + }, + { + "epoch": 0.8188289617135894, + "loss": 0.8285750150680542, + "loss_ce": 0.001426597940735519, + "loss_iou": 0.39453125, + "loss_num": 0.007415771484375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 403203532, + "step": 3192 + }, + { + "epoch": 0.8190854870775348, + "grad_norm": 33.52117156982422, + "learning_rate": 5e-06, + "loss": 0.8745, + "num_input_tokens_seen": 403330884, + "step": 3193 + }, + { + "epoch": 0.8190854870775348, + "loss": 1.0243020057678223, + "loss_ce": 0.004526656586676836, + "loss_iou": 0.447265625, + "loss_num": 0.025146484375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 403330884, + "step": 3193 + }, + { + "epoch": 0.8193420124414802, + "grad_norm": 44.59539794921875, + "learning_rate": 5e-06, + "loss": 0.9965, + "num_input_tokens_seen": 403457500, + "step": 3194 + }, + { + "epoch": 0.8193420124414802, + "loss": 0.9845786690711975, + "loss_ce": 0.0031333602964878082, + "loss_iou": 0.4453125, + "loss_num": 0.0179443359375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 403457500, + "step": 3194 + }, + { + "epoch": 0.8195985378054255, + "grad_norm": 43.587039947509766, + "learning_rate": 5e-06, + "loss": 0.8838, + "num_input_tokens_seen": 403583480, + "step": 3195 + }, + { + "epoch": 0.8195985378054255, + "loss": 0.918745219707489, + "loss_ce": 0.0012648054398596287, + "loss_iou": 0.421875, + "loss_num": 0.01507568359375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 403583480, + "step": 3195 + }, + { + "epoch": 0.8198550631693708, + "grad_norm": 37.55659103393555, + "learning_rate": 5e-06, + "loss": 0.9324, + "num_input_tokens_seen": 403708988, + "step": 3196 + }, + { + "epoch": 0.8198550631693708, + "loss": 0.7488930821418762, + "loss_ce": 0.00011378983617760241, + "loss_iou": 0.353515625, + "loss_num": 0.00836181640625, + "loss_xval": 0.75, + "num_input_tokens_seen": 403708988, + "step": 3196 + }, + { + "epoch": 0.8201115885333162, + "grad_norm": 46.053890228271484, + "learning_rate": 5e-06, + "loss": 0.8926, + "num_input_tokens_seen": 403835412, + "step": 3197 + }, + { + "epoch": 0.8201115885333162, + "loss": 0.722744345664978, + "loss_ce": 0.003017800860106945, + "loss_iou": 0.337890625, + "loss_num": 0.00860595703125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 403835412, + "step": 3197 + }, + { + "epoch": 0.8203681138972616, + "grad_norm": 57.597755432128906, + "learning_rate": 5e-06, + "loss": 0.9849, + "num_input_tokens_seen": 403962280, + "step": 3198 + }, + { + "epoch": 0.8203681138972616, + "loss": 1.0815812349319458, + "loss_ce": 0.001503093633800745, + "loss_iou": 0.50390625, + "loss_num": 0.01495361328125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 403962280, + "step": 3198 + }, + { + "epoch": 0.820624639261207, + "grad_norm": 40.663143157958984, + "learning_rate": 5e-06, + "loss": 0.9756, + "num_input_tokens_seen": 404088500, + "step": 3199 + }, + { + "epoch": 0.820624639261207, + "loss": 0.9303100109100342, + "loss_ce": 0.0011107935570180416, + "loss_iou": 0.412109375, + "loss_num": 0.021484375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 404088500, + "step": 3199 + }, + { + "epoch": 0.8208811646251523, + "grad_norm": 38.88434600830078, + "learning_rate": 5e-06, + "loss": 0.8722, + "num_input_tokens_seen": 404213836, + "step": 3200 + }, + { + "epoch": 0.8208811646251523, + "loss": 0.9379010200500488, + "loss_ce": 0.0006451201625168324, + "loss_iou": 0.435546875, + "loss_num": 0.01318359375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 404213836, + "step": 3200 + }, + { + "epoch": 0.8211376899890976, + "grad_norm": 33.84011459350586, + "learning_rate": 5e-06, + "loss": 0.8638, + "num_input_tokens_seen": 404338188, + "step": 3201 + }, + { + "epoch": 0.8211376899890976, + "loss": 0.9327942132949829, + "loss_ce": 0.0011535538360476494, + "loss_iou": 0.443359375, + "loss_num": 0.00909423828125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 404338188, + "step": 3201 + }, + { + "epoch": 0.821394215353043, + "grad_norm": 27.785184860229492, + "learning_rate": 5e-06, + "loss": 0.9015, + "num_input_tokens_seen": 404464976, + "step": 3202 + }, + { + "epoch": 0.821394215353043, + "loss": 0.8480465412139893, + "loss_ce": 0.0003902918251696974, + "loss_iou": 0.39453125, + "loss_num": 0.01177978515625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 404464976, + "step": 3202 + }, + { + "epoch": 0.8216507407169884, + "grad_norm": 31.14859962463379, + "learning_rate": 5e-06, + "loss": 0.8308, + "num_input_tokens_seen": 404590552, + "step": 3203 + }, + { + "epoch": 0.8216507407169884, + "loss": 0.9352068305015564, + "loss_ce": 0.00014822129742242396, + "loss_iou": 0.431640625, + "loss_num": 0.01416015625, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 404590552, + "step": 3203 + }, + { + "epoch": 0.8219072660809338, + "grad_norm": 22.50996208190918, + "learning_rate": 5e-06, + "loss": 0.9479, + "num_input_tokens_seen": 404716676, + "step": 3204 + }, + { + "epoch": 0.8219072660809338, + "loss": 0.8082893490791321, + "loss_ce": 0.00018384543363936245, + "loss_iou": 0.373046875, + "loss_num": 0.0123291015625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 404716676, + "step": 3204 + }, + { + "epoch": 0.8221637914448792, + "grad_norm": 38.72842788696289, + "learning_rate": 5e-06, + "loss": 0.824, + "num_input_tokens_seen": 404842608, + "step": 3205 + }, + { + "epoch": 0.8221637914448792, + "loss": 0.8294293880462646, + "loss_ce": 0.00032781471963971853, + "loss_iou": 0.3828125, + "loss_num": 0.0125732421875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 404842608, + "step": 3205 + }, + { + "epoch": 0.8224203168088244, + "grad_norm": 61.372371673583984, + "learning_rate": 5e-06, + "loss": 0.8637, + "num_input_tokens_seen": 404969328, + "step": 3206 + }, + { + "epoch": 0.8224203168088244, + "loss": 0.9179788827896118, + "loss_ce": 0.0007425149087794125, + "loss_iou": 0.423828125, + "loss_num": 0.01416015625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 404969328, + "step": 3206 + }, + { + "epoch": 0.8226768421727698, + "grad_norm": 42.90000915527344, + "learning_rate": 5e-06, + "loss": 0.9174, + "num_input_tokens_seen": 405096188, + "step": 3207 + }, + { + "epoch": 0.8226768421727698, + "loss": 0.9621914625167847, + "loss_ce": 0.0012539359740912914, + "loss_iou": 0.443359375, + "loss_num": 0.01470947265625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 405096188, + "step": 3207 + }, + { + "epoch": 0.8229333675367152, + "grad_norm": 38.445594787597656, + "learning_rate": 5e-06, + "loss": 1.0214, + "num_input_tokens_seen": 405222208, + "step": 3208 + }, + { + "epoch": 0.8229333675367152, + "loss": 0.9868891835212708, + "loss_ce": 0.0005610573571175337, + "loss_iou": 0.4453125, + "loss_num": 0.01953125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 405222208, + "step": 3208 + }, + { + "epoch": 0.8231898929006606, + "grad_norm": 54.31024169921875, + "learning_rate": 5e-06, + "loss": 0.9779, + "num_input_tokens_seen": 405347600, + "step": 3209 + }, + { + "epoch": 0.8231898929006606, + "loss": 1.0210685729980469, + "loss_ce": 0.0008048757445067167, + "loss_iou": 0.4609375, + "loss_num": 0.01953125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 405347600, + "step": 3209 + }, + { + "epoch": 0.823446418264606, + "grad_norm": 43.54954528808594, + "learning_rate": 5e-06, + "loss": 0.9635, + "num_input_tokens_seen": 405473460, + "step": 3210 + }, + { + "epoch": 0.823446418264606, + "loss": 0.95503169298172, + "loss_ce": 0.00019767673802562058, + "loss_iou": 0.439453125, + "loss_num": 0.01544189453125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 405473460, + "step": 3210 + }, + { + "epoch": 0.8237029436285512, + "grad_norm": 44.985145568847656, + "learning_rate": 5e-06, + "loss": 0.9054, + "num_input_tokens_seen": 405599032, + "step": 3211 + }, + { + "epoch": 0.8237029436285512, + "loss": 0.8969042897224426, + "loss_ce": 0.000419898220570758, + "loss_iou": 0.41796875, + "loss_num": 0.0120849609375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 405599032, + "step": 3211 + }, + { + "epoch": 0.8239594689924966, + "grad_norm": 40.911678314208984, + "learning_rate": 5e-06, + "loss": 0.8938, + "num_input_tokens_seen": 405725004, + "step": 3212 + }, + { + "epoch": 0.8239594689924966, + "loss": 0.9338076114654541, + "loss_ce": 0.0011904474813491106, + "loss_iou": 0.4296875, + "loss_num": 0.01434326171875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 405725004, + "step": 3212 + }, + { + "epoch": 0.824215994356442, + "grad_norm": 46.582942962646484, + "learning_rate": 5e-06, + "loss": 0.8709, + "num_input_tokens_seen": 405851500, + "step": 3213 + }, + { + "epoch": 0.824215994356442, + "loss": 1.0055956840515137, + "loss_ce": 0.0021777364891022444, + "loss_iou": 0.44921875, + "loss_num": 0.02099609375, + "loss_xval": 1.0, + "num_input_tokens_seen": 405851500, + "step": 3213 + }, + { + "epoch": 0.8244725197203874, + "grad_norm": 43.71705627441406, + "learning_rate": 5e-06, + "loss": 0.9325, + "num_input_tokens_seen": 405977952, + "step": 3214 + }, + { + "epoch": 0.8244725197203874, + "loss": 1.017009973526001, + "loss_ce": 0.00040835858089849353, + "loss_iou": 0.46484375, + "loss_num": 0.017822265625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 405977952, + "step": 3214 + }, + { + "epoch": 0.8247290450843328, + "grad_norm": 32.80316162109375, + "learning_rate": 5e-06, + "loss": 0.8115, + "num_input_tokens_seen": 406103860, + "step": 3215 + }, + { + "epoch": 0.8247290450843328, + "loss": 0.8729409575462341, + "loss_ce": 0.0011148026678711176, + "loss_iou": 0.40625, + "loss_num": 0.01177978515625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 406103860, + "step": 3215 + }, + { + "epoch": 0.824985570448278, + "grad_norm": 41.50703811645508, + "learning_rate": 5e-06, + "loss": 0.8484, + "num_input_tokens_seen": 406229904, + "step": 3216 + }, + { + "epoch": 0.824985570448278, + "loss": 0.8409217596054077, + "loss_ce": 0.00864636804908514, + "loss_iou": 0.390625, + "loss_num": 0.010498046875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 406229904, + "step": 3216 + }, + { + "epoch": 0.8252420958122234, + "grad_norm": 46.06699752807617, + "learning_rate": 5e-06, + "loss": 0.9256, + "num_input_tokens_seen": 406356052, + "step": 3217 + }, + { + "epoch": 0.8252420958122234, + "loss": 1.0339198112487793, + "loss_ce": 0.00022835502750240266, + "loss_iou": 0.4765625, + "loss_num": 0.016357421875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 406356052, + "step": 3217 + }, + { + "epoch": 0.8254986211761688, + "grad_norm": 50.41526794433594, + "learning_rate": 5e-06, + "loss": 0.9969, + "num_input_tokens_seen": 406483820, + "step": 3218 + }, + { + "epoch": 0.8254986211761688, + "loss": 1.0014177560806274, + "loss_ce": 0.003370875958353281, + "loss_iou": 0.455078125, + "loss_num": 0.017822265625, + "loss_xval": 1.0, + "num_input_tokens_seen": 406483820, + "step": 3218 + }, + { + "epoch": 0.8257551465401142, + "grad_norm": 57.19381332397461, + "learning_rate": 5e-06, + "loss": 1.006, + "num_input_tokens_seen": 406611200, + "step": 3219 + }, + { + "epoch": 0.8257551465401142, + "loss": 1.000044345855713, + "loss_ce": 0.0005326105747371912, + "loss_iou": 0.44921875, + "loss_num": 0.0205078125, + "loss_xval": 1.0, + "num_input_tokens_seen": 406611200, + "step": 3219 + }, + { + "epoch": 0.8260116719040596, + "grad_norm": 49.69628143310547, + "learning_rate": 5e-06, + "loss": 0.9399, + "num_input_tokens_seen": 406737788, + "step": 3220 + }, + { + "epoch": 0.8260116719040596, + "loss": 0.9905070066452026, + "loss_ce": 0.0002726099919527769, + "loss_iou": 0.447265625, + "loss_num": 0.0189208984375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 406737788, + "step": 3220 + }, + { + "epoch": 0.8262681972680048, + "grad_norm": 63.15536117553711, + "learning_rate": 5e-06, + "loss": 0.9407, + "num_input_tokens_seen": 406864780, + "step": 3221 + }, + { + "epoch": 0.8262681972680048, + "loss": 0.8995381593704224, + "loss_ce": 0.0008565601310692728, + "loss_iou": 0.41796875, + "loss_num": 0.0125732421875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 406864780, + "step": 3221 + }, + { + "epoch": 0.8265247226319502, + "grad_norm": 44.9344482421875, + "learning_rate": 5e-06, + "loss": 0.947, + "num_input_tokens_seen": 406992320, + "step": 3222 + }, + { + "epoch": 0.8265247226319502, + "loss": 0.9755024909973145, + "loss_ce": 0.00040485113277100027, + "loss_iou": 0.4375, + "loss_num": 0.0198974609375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 406992320, + "step": 3222 + }, + { + "epoch": 0.8267812479958956, + "grad_norm": 23.89229393005371, + "learning_rate": 5e-06, + "loss": 0.8089, + "num_input_tokens_seen": 407118476, + "step": 3223 + }, + { + "epoch": 0.8267812479958956, + "loss": 0.7007943391799927, + "loss_ce": 0.00023281101312022656, + "loss_iou": 0.328125, + "loss_num": 0.0084228515625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 407118476, + "step": 3223 + }, + { + "epoch": 0.827037773359841, + "grad_norm": 33.91229248046875, + "learning_rate": 5e-06, + "loss": 0.8696, + "num_input_tokens_seen": 407244128, + "step": 3224 + }, + { + "epoch": 0.827037773359841, + "loss": 0.9473456144332886, + "loss_ce": 0.0017889684531837702, + "loss_iou": 0.41015625, + "loss_num": 0.024658203125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 407244128, + "step": 3224 + }, + { + "epoch": 0.8272942987237863, + "grad_norm": 41.23390197753906, + "learning_rate": 5e-06, + "loss": 0.8924, + "num_input_tokens_seen": 407370680, + "step": 3225 + }, + { + "epoch": 0.8272942987237863, + "loss": 0.8628523349761963, + "loss_ce": 0.0010359329171478748, + "loss_iou": 0.404296875, + "loss_num": 0.01080322265625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 407370680, + "step": 3225 + }, + { + "epoch": 0.8275508240877317, + "grad_norm": 37.419532775878906, + "learning_rate": 5e-06, + "loss": 0.8978, + "num_input_tokens_seen": 407495772, + "step": 3226 + }, + { + "epoch": 0.8275508240877317, + "loss": 0.8094415068626404, + "loss_ce": 0.0008477434166707098, + "loss_iou": 0.37109375, + "loss_num": 0.01373291015625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 407495772, + "step": 3226 + }, + { + "epoch": 0.827807349451677, + "grad_norm": 35.59785079956055, + "learning_rate": 5e-06, + "loss": 0.8984, + "num_input_tokens_seen": 407622200, + "step": 3227 + }, + { + "epoch": 0.827807349451677, + "loss": 1.0361979007720947, + "loss_ce": 0.0005533768562600017, + "loss_iou": 0.455078125, + "loss_num": 0.0250244140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 407622200, + "step": 3227 + }, + { + "epoch": 0.8280638748156224, + "grad_norm": 37.97925567626953, + "learning_rate": 5e-06, + "loss": 0.8899, + "num_input_tokens_seen": 407748132, + "step": 3228 + }, + { + "epoch": 0.8280638748156224, + "loss": 0.8000407218933105, + "loss_ce": 0.0031657565850764513, + "loss_iou": 0.376953125, + "loss_num": 0.00836181640625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 407748132, + "step": 3228 + }, + { + "epoch": 0.8283204001795678, + "grad_norm": 46.49896240234375, + "learning_rate": 5e-06, + "loss": 0.8974, + "num_input_tokens_seen": 407874360, + "step": 3229 + }, + { + "epoch": 0.8283204001795678, + "loss": 0.8780361413955688, + "loss_ce": 0.0010830394458025694, + "loss_iou": 0.39453125, + "loss_num": 0.01708984375, + "loss_xval": 0.875, + "num_input_tokens_seen": 407874360, + "step": 3229 + }, + { + "epoch": 0.8285769255435131, + "grad_norm": 45.23713684082031, + "learning_rate": 5e-06, + "loss": 0.93, + "num_input_tokens_seen": 408000416, + "step": 3230 + }, + { + "epoch": 0.8285769255435131, + "loss": 1.0315477848052979, + "loss_ce": 0.0027392571792006493, + "loss_iou": 0.4609375, + "loss_num": 0.021728515625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 408000416, + "step": 3230 + }, + { + "epoch": 0.8288334509074585, + "grad_norm": 37.725067138671875, + "learning_rate": 5e-06, + "loss": 0.993, + "num_input_tokens_seen": 408128304, + "step": 3231 + }, + { + "epoch": 0.8288334509074585, + "loss": 1.0059795379638672, + "loss_ce": 0.0010966637637466192, + "loss_iou": 0.458984375, + "loss_num": 0.0177001953125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 408128304, + "step": 3231 + }, + { + "epoch": 0.8290899762714038, + "grad_norm": 38.406288146972656, + "learning_rate": 5e-06, + "loss": 0.9567, + "num_input_tokens_seen": 408254616, + "step": 3232 + }, + { + "epoch": 0.8290899762714038, + "loss": 0.7051502466201782, + "loss_ce": 0.0005604479811154306, + "loss_iou": 0.337890625, + "loss_num": 0.0054931640625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 408254616, + "step": 3232 + }, + { + "epoch": 0.8293465016353492, + "grad_norm": 44.0836067199707, + "learning_rate": 5e-06, + "loss": 0.8199, + "num_input_tokens_seen": 408380176, + "step": 3233 + }, + { + "epoch": 0.8293465016353492, + "loss": 0.8648995161056519, + "loss_ce": 0.0003975875151809305, + "loss_iou": 0.39453125, + "loss_num": 0.01483154296875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 408380176, + "step": 3233 + }, + { + "epoch": 0.8296030269992946, + "grad_norm": 42.561683654785156, + "learning_rate": 5e-06, + "loss": 0.9885, + "num_input_tokens_seen": 408505864, + "step": 3234 + }, + { + "epoch": 0.8296030269992946, + "loss": 0.971314013004303, + "loss_ce": 0.0008550078491680324, + "loss_iou": 0.451171875, + "loss_num": 0.01385498046875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 408505864, + "step": 3234 + }, + { + "epoch": 0.8298595523632399, + "grad_norm": 100.6839599609375, + "learning_rate": 5e-06, + "loss": 0.9402, + "num_input_tokens_seen": 408632032, + "step": 3235 + }, + { + "epoch": 0.8298595523632399, + "loss": 0.8945464491844177, + "loss_ce": 0.001480043400079012, + "loss_iou": 0.41796875, + "loss_num": 0.01141357421875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 408632032, + "step": 3235 + }, + { + "epoch": 0.8301160777271853, + "grad_norm": 36.440345764160156, + "learning_rate": 5e-06, + "loss": 1.0001, + "num_input_tokens_seen": 408757472, + "step": 3236 + }, + { + "epoch": 0.8301160777271853, + "loss": 1.0890145301818848, + "loss_ce": 0.00014735243166796863, + "loss_iou": 0.486328125, + "loss_num": 0.023193359375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 408757472, + "step": 3236 + }, + { + "epoch": 0.8303726030911306, + "grad_norm": 51.320289611816406, + "learning_rate": 5e-06, + "loss": 0.8715, + "num_input_tokens_seen": 408883636, + "step": 3237 + }, + { + "epoch": 0.8303726030911306, + "loss": 0.8932956457138062, + "loss_ce": 0.0007174824131652713, + "loss_iou": 0.431640625, + "loss_num": 0.00567626953125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 408883636, + "step": 3237 + }, + { + "epoch": 0.830629128455076, + "grad_norm": 45.12207794189453, + "learning_rate": 5e-06, + "loss": 0.8901, + "num_input_tokens_seen": 409011016, + "step": 3238 + }, + { + "epoch": 0.830629128455076, + "loss": 0.813601016998291, + "loss_ce": 0.0018334295600652695, + "loss_iou": 0.375, + "loss_num": 0.01214599609375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 409011016, + "step": 3238 + }, + { + "epoch": 0.8308856538190214, + "grad_norm": 41.72220230102539, + "learning_rate": 5e-06, + "loss": 1.0117, + "num_input_tokens_seen": 409137556, + "step": 3239 + }, + { + "epoch": 0.8308856538190214, + "loss": 0.9541773796081543, + "loss_ce": 0.0005641456227749586, + "loss_iou": 0.43359375, + "loss_num": 0.017578125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 409137556, + "step": 3239 + }, + { + "epoch": 0.8311421791829667, + "grad_norm": 45.50634765625, + "learning_rate": 5e-06, + "loss": 0.9451, + "num_input_tokens_seen": 409263968, + "step": 3240 + }, + { + "epoch": 0.8311421791829667, + "loss": 1.0201194286346436, + "loss_ce": 0.0025413173716515303, + "loss_iou": 0.462890625, + "loss_num": 0.0181884765625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 409263968, + "step": 3240 + }, + { + "epoch": 0.8313987045469121, + "grad_norm": 40.572792053222656, + "learning_rate": 5e-06, + "loss": 1.029, + "num_input_tokens_seen": 409389644, + "step": 3241 + }, + { + "epoch": 0.8313987045469121, + "loss": 1.0275163650512695, + "loss_ce": 0.0018816409865394235, + "loss_iou": 0.455078125, + "loss_num": 0.0233154296875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 409389644, + "step": 3241 + }, + { + "epoch": 0.8316552299108574, + "grad_norm": 42.139808654785156, + "learning_rate": 5e-06, + "loss": 0.971, + "num_input_tokens_seen": 409516320, + "step": 3242 + }, + { + "epoch": 0.8316552299108574, + "loss": 1.0014221668243408, + "loss_ce": 0.004840051289647818, + "loss_iou": 0.453125, + "loss_num": 0.017822265625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 409516320, + "step": 3242 + }, + { + "epoch": 0.8319117552748028, + "grad_norm": 58.47203063964844, + "learning_rate": 5e-06, + "loss": 0.8449, + "num_input_tokens_seen": 409643464, + "step": 3243 + }, + { + "epoch": 0.8319117552748028, + "loss": 0.7905172109603882, + "loss_ce": 0.002431262284517288, + "loss_iou": 0.361328125, + "loss_num": 0.0133056640625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 409643464, + "step": 3243 + }, + { + "epoch": 0.8321682806387481, + "grad_norm": 50.82135772705078, + "learning_rate": 5e-06, + "loss": 1.0532, + "num_input_tokens_seen": 409769464, + "step": 3244 + }, + { + "epoch": 0.8321682806387481, + "loss": 0.9317537546157837, + "loss_ce": 0.00011310909758321941, + "loss_iou": 0.431640625, + "loss_num": 0.0135498046875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 409769464, + "step": 3244 + }, + { + "epoch": 0.8324248060026935, + "grad_norm": 31.645689010620117, + "learning_rate": 5e-06, + "loss": 0.9772, + "num_input_tokens_seen": 409896824, + "step": 3245 + }, + { + "epoch": 0.8324248060026935, + "loss": 0.9744656682014465, + "loss_ce": 0.000344633765053004, + "loss_iou": 0.451171875, + "loss_num": 0.01446533203125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 409896824, + "step": 3245 + }, + { + "epoch": 0.8326813313666389, + "grad_norm": 34.07466506958008, + "learning_rate": 5e-06, + "loss": 0.9796, + "num_input_tokens_seen": 410022328, + "step": 3246 + }, + { + "epoch": 0.8326813313666389, + "loss": 1.0665581226348877, + "loss_ce": 0.00015182669449131936, + "loss_iou": 0.484375, + "loss_num": 0.01953125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 410022328, + "step": 3246 + }, + { + "epoch": 0.8329378567305843, + "grad_norm": 47.31861114501953, + "learning_rate": 5e-06, + "loss": 0.9616, + "num_input_tokens_seen": 410148932, + "step": 3247 + }, + { + "epoch": 0.8329378567305843, + "loss": 0.9139103889465332, + "loss_ce": 0.0013127480633556843, + "loss_iou": 0.42578125, + "loss_num": 0.01239013671875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 410148932, + "step": 3247 + }, + { + "epoch": 0.8331943820945296, + "grad_norm": 41.639217376708984, + "learning_rate": 5e-06, + "loss": 0.8481, + "num_input_tokens_seen": 410275464, + "step": 3248 + }, + { + "epoch": 0.8331943820945296, + "loss": 0.7717355489730835, + "loss_ce": 0.0002512157952878624, + "loss_iou": 0.375, + "loss_num": 0.004547119140625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 410275464, + "step": 3248 + }, + { + "epoch": 0.8334509074584749, + "grad_norm": 32.76722717285156, + "learning_rate": 5e-06, + "loss": 0.8392, + "num_input_tokens_seen": 410402120, + "step": 3249 + }, + { + "epoch": 0.8334509074584749, + "loss": 0.9330198764801025, + "loss_ce": 0.0023558104876428843, + "loss_iou": 0.421875, + "loss_num": 0.0177001953125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 410402120, + "step": 3249 + }, + { + "epoch": 0.8337074328224203, + "grad_norm": 45.6946907043457, + "learning_rate": 5e-06, + "loss": 0.8654, + "num_input_tokens_seen": 410528264, + "step": 3250 + }, + { + "epoch": 0.8337074328224203, + "eval_icons_CIoU": 0.2547183632850647, + "eval_icons_GIoU": 0.21344279497861862, + "eval_icons_IoU": 0.4413909614086151, + "eval_icons_MAE_all": 0.02615351229906082, + "eval_icons_MAE_h": 0.03489969111979008, + "eval_icons_MAE_w": 0.05405646190047264, + "eval_icons_MAE_x_boxes": 0.05492858774960041, + "eval_icons_MAE_y_boxes": 0.037086451426148415, + "eval_icons_NUM_probability": 0.99985072016716, + "eval_icons_inside_bbox": 0.7239583432674408, + "eval_icons_loss": 1.678513765335083, + "eval_icons_loss_ce": 6.53693077765638e-05, + "eval_icons_loss_iou": 0.775634765625, + "eval_icons_loss_num": 0.028594970703125, + "eval_icons_loss_xval": 1.69482421875, + "eval_icons_runtime": 51.2112, + "eval_icons_samples_per_second": 0.976, + "eval_icons_steps_per_second": 0.039, + "num_input_tokens_seen": 410528264, + "step": 3250 + }, + { + "epoch": 0.8337074328224203, + "eval_screenspot_CIoU": 0.12010233600934346, + "eval_screenspot_GIoU": 0.10448726018269856, + "eval_screenspot_IoU": 0.2869392881790797, + "eval_screenspot_MAE_all": 0.07776643956700961, + "eval_screenspot_MAE_h": 0.07446849967042606, + "eval_screenspot_MAE_w": 0.12109563251336415, + "eval_screenspot_MAE_x_boxes": 0.09650040666262309, + "eval_screenspot_MAE_y_boxes": 0.05737322320540746, + "eval_screenspot_NUM_probability": 0.9999246994654337, + "eval_screenspot_inside_bbox": 0.6358333428700765, + "eval_screenspot_loss": 2.2128961086273193, + "eval_screenspot_loss_ce": 0.0020763227560867867, + "eval_screenspot_loss_iou": 0.9131673177083334, + "eval_screenspot_loss_num": 0.0822296142578125, + "eval_screenspot_loss_xval": 2.2376302083333335, + "eval_screenspot_runtime": 100.3915, + "eval_screenspot_samples_per_second": 0.887, + "eval_screenspot_steps_per_second": 0.03, + "num_input_tokens_seen": 410528264, + "step": 3250 + }, + { + "epoch": 0.8337074328224203, + "loss": 2.1324052810668945, + "loss_ce": 0.0015457894187420607, + "loss_iou": 0.89453125, + "loss_num": 0.06884765625, + "loss_xval": 2.125, + "num_input_tokens_seen": 410528264, + "step": 3250 + }, + { + "epoch": 0.8339639581863657, + "grad_norm": 71.37127685546875, + "learning_rate": 5e-06, + "loss": 0.9711, + "num_input_tokens_seen": 410655152, + "step": 3251 + }, + { + "epoch": 0.8339639581863657, + "loss": 1.0040916204452515, + "loss_ce": 0.0016502051148563623, + "loss_iou": 0.4609375, + "loss_num": 0.0164794921875, + "loss_xval": 1.0, + "num_input_tokens_seen": 410655152, + "step": 3251 + }, + { + "epoch": 0.8342204835503111, + "grad_norm": 44.42815017700195, + "learning_rate": 5e-06, + "loss": 1.0467, + "num_input_tokens_seen": 410781440, + "step": 3252 + }, + { + "epoch": 0.8342204835503111, + "loss": 0.99126136302948, + "loss_ce": 0.0005387411219999194, + "loss_iou": 0.453125, + "loss_num": 0.01708984375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 410781440, + "step": 3252 + }, + { + "epoch": 0.8344770089142564, + "grad_norm": 52.268245697021484, + "learning_rate": 5e-06, + "loss": 0.8704, + "num_input_tokens_seen": 410907832, + "step": 3253 + }, + { + "epoch": 0.8344770089142564, + "loss": 0.9251123070716858, + "loss_ce": 0.0003076334251090884, + "loss_iou": 0.431640625, + "loss_num": 0.0125732421875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 410907832, + "step": 3253 + }, + { + "epoch": 0.8347335342782017, + "grad_norm": 52.71864700317383, + "learning_rate": 5e-06, + "loss": 1.0611, + "num_input_tokens_seen": 411035956, + "step": 3254 + }, + { + "epoch": 0.8347335342782017, + "loss": 0.986844539642334, + "loss_ce": 0.0024696062318980694, + "loss_iou": 0.462890625, + "loss_num": 0.01165771484375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 411035956, + "step": 3254 + }, + { + "epoch": 0.8349900596421471, + "grad_norm": 47.83852005004883, + "learning_rate": 5e-06, + "loss": 0.8981, + "num_input_tokens_seen": 411162016, + "step": 3255 + }, + { + "epoch": 0.8349900596421471, + "loss": 1.0862611532211304, + "loss_ce": 0.0017885229317471385, + "loss_iou": 0.47265625, + "loss_num": 0.0277099609375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 411162016, + "step": 3255 + }, + { + "epoch": 0.8352465850060925, + "grad_norm": 63.054710388183594, + "learning_rate": 5e-06, + "loss": 1.0281, + "num_input_tokens_seen": 411289844, + "step": 3256 + }, + { + "epoch": 0.8352465850060925, + "loss": 1.1283671855926514, + "loss_ce": 0.002390654291957617, + "loss_iou": 0.5078125, + "loss_num": 0.022216796875, + "loss_xval": 1.125, + "num_input_tokens_seen": 411289844, + "step": 3256 + }, + { + "epoch": 0.8355031103700379, + "grad_norm": 63.21312713623047, + "learning_rate": 5e-06, + "loss": 0.9566, + "num_input_tokens_seen": 411415456, + "step": 3257 + }, + { + "epoch": 0.8355031103700379, + "loss": 0.9683589935302734, + "loss_ce": 0.0010738681303337216, + "loss_iou": 0.443359375, + "loss_num": 0.015869140625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 411415456, + "step": 3257 + }, + { + "epoch": 0.8357596357339832, + "grad_norm": 75.36348724365234, + "learning_rate": 5e-06, + "loss": 0.951, + "num_input_tokens_seen": 411541776, + "step": 3258 + }, + { + "epoch": 0.8357596357339832, + "loss": 0.877450704574585, + "loss_ce": 0.0004975988995283842, + "loss_iou": 0.412109375, + "loss_num": 0.01055908203125, + "loss_xval": 0.875, + "num_input_tokens_seen": 411541776, + "step": 3258 + }, + { + "epoch": 0.8360161610979285, + "grad_norm": 41.369781494140625, + "learning_rate": 5e-06, + "loss": 0.8733, + "num_input_tokens_seen": 411668528, + "step": 3259 + }, + { + "epoch": 0.8360161610979285, + "loss": 0.7836183309555054, + "loss_ce": 0.0013917863834649324, + "loss_iou": 0.361328125, + "loss_num": 0.01190185546875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 411668528, + "step": 3259 + }, + { + "epoch": 0.8362726864618739, + "grad_norm": 46.048614501953125, + "learning_rate": 5e-06, + "loss": 0.9051, + "num_input_tokens_seen": 411794480, + "step": 3260 + }, + { + "epoch": 0.8362726864618739, + "loss": 0.8674889802932739, + "loss_ce": 5.7305765949422494e-05, + "loss_iou": 0.40625, + "loss_num": 0.01080322265625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 411794480, + "step": 3260 + }, + { + "epoch": 0.8365292118258193, + "grad_norm": 46.227386474609375, + "learning_rate": 5e-06, + "loss": 1.0017, + "num_input_tokens_seen": 411920980, + "step": 3261 + }, + { + "epoch": 0.8365292118258193, + "loss": 1.2831918001174927, + "loss_ce": 0.0014534820802509785, + "loss_iou": 0.56640625, + "loss_num": 0.0296630859375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 411920980, + "step": 3261 + }, + { + "epoch": 0.8367857371897647, + "grad_norm": 34.17587661743164, + "learning_rate": 5e-06, + "loss": 1.0308, + "num_input_tokens_seen": 412046976, + "step": 3262 + }, + { + "epoch": 0.8367857371897647, + "loss": 1.2230675220489502, + "loss_ce": 0.002364428248256445, + "loss_iou": 0.56640625, + "loss_num": 0.017578125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 412046976, + "step": 3262 + }, + { + "epoch": 0.83704226255371, + "grad_norm": 21.054309844970703, + "learning_rate": 5e-06, + "loss": 0.7924, + "num_input_tokens_seen": 412172996, + "step": 3263 + }, + { + "epoch": 0.83704226255371, + "loss": 0.9540098309516907, + "loss_ce": 0.0033262295182794333, + "loss_iou": 0.435546875, + "loss_num": 0.015869140625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 412172996, + "step": 3263 + }, + { + "epoch": 0.8372987879176553, + "grad_norm": 34.457271575927734, + "learning_rate": 5e-06, + "loss": 0.8711, + "num_input_tokens_seen": 412298648, + "step": 3264 + }, + { + "epoch": 0.8372987879176553, + "loss": 1.060942530632019, + "loss_ce": 0.0008839344372972846, + "loss_iou": 0.486328125, + "loss_num": 0.017822265625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 412298648, + "step": 3264 + }, + { + "epoch": 0.8375553132816007, + "grad_norm": 41.15773010253906, + "learning_rate": 5e-06, + "loss": 0.8832, + "num_input_tokens_seen": 412424388, + "step": 3265 + }, + { + "epoch": 0.8375553132816007, + "loss": 0.9723127484321594, + "loss_ce": 0.0001447701215511188, + "loss_iou": 0.453125, + "loss_num": 0.0135498046875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 412424388, + "step": 3265 + }, + { + "epoch": 0.8378118386455461, + "grad_norm": 47.46888732910156, + "learning_rate": 5e-06, + "loss": 0.8643, + "num_input_tokens_seen": 412550524, + "step": 3266 + }, + { + "epoch": 0.8378118386455461, + "loss": 1.0214431285858154, + "loss_ce": 0.0016678018728271127, + "loss_iou": 0.451171875, + "loss_num": 0.0235595703125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 412550524, + "step": 3266 + }, + { + "epoch": 0.8380683640094915, + "grad_norm": 54.783687591552734, + "learning_rate": 5e-06, + "loss": 0.8516, + "num_input_tokens_seen": 412677192, + "step": 3267 + }, + { + "epoch": 0.8380683640094915, + "loss": 0.8658664226531982, + "loss_ce": 0.0011203757021576166, + "loss_iou": 0.41015625, + "loss_num": 0.0087890625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 412677192, + "step": 3267 + }, + { + "epoch": 0.8383248893734367, + "grad_norm": 51.795318603515625, + "learning_rate": 5e-06, + "loss": 0.9689, + "num_input_tokens_seen": 412802836, + "step": 3268 + }, + { + "epoch": 0.8383248893734367, + "loss": 1.0761953592300415, + "loss_ce": 0.002464849501848221, + "loss_iou": 0.48046875, + "loss_num": 0.0230712890625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 412802836, + "step": 3268 + }, + { + "epoch": 0.8385814147373821, + "grad_norm": 39.276065826416016, + "learning_rate": 5e-06, + "loss": 0.818, + "num_input_tokens_seen": 412928912, + "step": 3269 + }, + { + "epoch": 0.8385814147373821, + "loss": 0.8427125215530396, + "loss_ce": 0.002624644199386239, + "loss_iou": 0.392578125, + "loss_num": 0.01123046875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 412928912, + "step": 3269 + }, + { + "epoch": 0.8388379401013275, + "grad_norm": 47.479312896728516, + "learning_rate": 5e-06, + "loss": 0.8458, + "num_input_tokens_seen": 413055576, + "step": 3270 + }, + { + "epoch": 0.8388379401013275, + "loss": 0.9539889097213745, + "loss_ce": 0.0015962861943989992, + "loss_iou": 0.423828125, + "loss_num": 0.0213623046875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 413055576, + "step": 3270 + }, + { + "epoch": 0.8390944654652729, + "grad_norm": 54.887062072753906, + "learning_rate": 5e-06, + "loss": 0.8547, + "num_input_tokens_seen": 413181136, + "step": 3271 + }, + { + "epoch": 0.8390944654652729, + "loss": 1.012515902519226, + "loss_ce": 0.00030888558831065893, + "loss_iou": 0.47265625, + "loss_num": 0.01397705078125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 413181136, + "step": 3271 + }, + { + "epoch": 0.8393509908292183, + "grad_norm": 38.04448699951172, + "learning_rate": 5e-06, + "loss": 0.9412, + "num_input_tokens_seen": 413307348, + "step": 3272 + }, + { + "epoch": 0.8393509908292183, + "loss": 0.7754035592079163, + "loss_ce": 0.00196608598344028, + "loss_iou": 0.361328125, + "loss_num": 0.010498046875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 413307348, + "step": 3272 + }, + { + "epoch": 0.8396075161931636, + "grad_norm": 60.24583053588867, + "learning_rate": 5e-06, + "loss": 0.9614, + "num_input_tokens_seen": 413434492, + "step": 3273 + }, + { + "epoch": 0.8396075161931636, + "loss": 1.033352255821228, + "loss_ce": 0.0030788236763328314, + "loss_iou": 0.4609375, + "loss_num": 0.0216064453125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 413434492, + "step": 3273 + }, + { + "epoch": 0.8398640415571089, + "grad_norm": 34.550453186035156, + "learning_rate": 5e-06, + "loss": 0.9347, + "num_input_tokens_seen": 413559940, + "step": 3274 + }, + { + "epoch": 0.8398640415571089, + "loss": 0.9444822669029236, + "loss_ce": 0.00014633704267907888, + "loss_iou": 0.43359375, + "loss_num": 0.0152587890625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 413559940, + "step": 3274 + }, + { + "epoch": 0.8401205669210543, + "grad_norm": 25.34375762939453, + "learning_rate": 5e-06, + "loss": 0.9393, + "num_input_tokens_seen": 413684888, + "step": 3275 + }, + { + "epoch": 0.8401205669210543, + "loss": 0.8565676212310791, + "loss_ce": 0.00036639804602600634, + "loss_iou": 0.400390625, + "loss_num": 0.0115966796875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 413684888, + "step": 3275 + }, + { + "epoch": 0.8403770922849997, + "grad_norm": 19.35994529724121, + "learning_rate": 5e-06, + "loss": 1.002, + "num_input_tokens_seen": 413811272, + "step": 3276 + }, + { + "epoch": 0.8403770922849997, + "loss": 0.9814521670341492, + "loss_ce": 0.0004951510345563293, + "loss_iou": 0.455078125, + "loss_num": 0.0137939453125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 413811272, + "step": 3276 + }, + { + "epoch": 0.8406336176489451, + "grad_norm": 30.091197967529297, + "learning_rate": 5e-06, + "loss": 0.9354, + "num_input_tokens_seen": 413937192, + "step": 3277 + }, + { + "epoch": 0.8406336176489451, + "loss": 1.185107707977295, + "loss_ce": 0.0010257081594318151, + "loss_iou": 0.53125, + "loss_num": 0.023681640625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 413937192, + "step": 3277 + }, + { + "epoch": 0.8408901430128904, + "grad_norm": 39.94975662231445, + "learning_rate": 5e-06, + "loss": 0.8531, + "num_input_tokens_seen": 414063880, + "step": 3278 + }, + { + "epoch": 0.8408901430128904, + "loss": 0.9416366219520569, + "loss_ce": 0.000718640279956162, + "loss_iou": 0.4375, + "loss_num": 0.01324462890625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 414063880, + "step": 3278 + }, + { + "epoch": 0.8411466683768357, + "grad_norm": 29.290145874023438, + "learning_rate": 5e-06, + "loss": 0.9506, + "num_input_tokens_seen": 414189888, + "step": 3279 + }, + { + "epoch": 0.8411466683768357, + "loss": 0.9394330382347107, + "loss_ce": 0.0024213106371462345, + "loss_iou": 0.42578125, + "loss_num": 0.0166015625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 414189888, + "step": 3279 + }, + { + "epoch": 0.8414031937407811, + "grad_norm": 31.89249610900879, + "learning_rate": 5e-06, + "loss": 0.9291, + "num_input_tokens_seen": 414315788, + "step": 3280 + }, + { + "epoch": 0.8414031937407811, + "loss": 0.7641960978507996, + "loss_ce": 0.0015008015325292945, + "loss_iou": 0.34375, + "loss_num": 0.01470947265625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 414315788, + "step": 3280 + }, + { + "epoch": 0.8416597191047265, + "grad_norm": 46.444087982177734, + "learning_rate": 5e-06, + "loss": 0.9562, + "num_input_tokens_seen": 414442504, + "step": 3281 + }, + { + "epoch": 0.8416597191047265, + "loss": 0.9517788290977478, + "loss_ce": 0.0030483717564493418, + "loss_iou": 0.443359375, + "loss_num": 0.012451171875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 414442504, + "step": 3281 + }, + { + "epoch": 0.8419162444686719, + "grad_norm": 54.40727996826172, + "learning_rate": 5e-06, + "loss": 0.9411, + "num_input_tokens_seen": 414568268, + "step": 3282 + }, + { + "epoch": 0.8419162444686719, + "loss": 1.0423855781555176, + "loss_ce": 0.0013699313858523965, + "loss_iou": 0.484375, + "loss_num": 0.0147705078125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 414568268, + "step": 3282 + }, + { + "epoch": 0.8421727698326172, + "grad_norm": 45.83207321166992, + "learning_rate": 5e-06, + "loss": 0.9898, + "num_input_tokens_seen": 414693156, + "step": 3283 + }, + { + "epoch": 0.8421727698326172, + "loss": 0.9596890211105347, + "loss_ce": 0.001681216643191874, + "loss_iou": 0.451171875, + "loss_num": 0.01129150390625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 414693156, + "step": 3283 + }, + { + "epoch": 0.8424292951965625, + "grad_norm": 50.873600006103516, + "learning_rate": 5e-06, + "loss": 1.0078, + "num_input_tokens_seen": 414819492, + "step": 3284 + }, + { + "epoch": 0.8424292951965625, + "loss": 0.8282773494720459, + "loss_ce": 0.0011289074318483472, + "loss_iou": 0.388671875, + "loss_num": 0.0098876953125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 414819492, + "step": 3284 + }, + { + "epoch": 0.8426858205605079, + "grad_norm": 51.38712692260742, + "learning_rate": 5e-06, + "loss": 1.0126, + "num_input_tokens_seen": 414945116, + "step": 3285 + }, + { + "epoch": 0.8426858205605079, + "loss": 1.0724636316299438, + "loss_ce": 0.0045924922451376915, + "loss_iou": 0.48046875, + "loss_num": 0.0205078125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 414945116, + "step": 3285 + }, + { + "epoch": 0.8429423459244533, + "grad_norm": 24.471580505371094, + "learning_rate": 5e-06, + "loss": 0.9559, + "num_input_tokens_seen": 415071328, + "step": 3286 + }, + { + "epoch": 0.8429423459244533, + "loss": 1.041412591934204, + "loss_ce": 0.0003969701938331127, + "loss_iou": 0.478515625, + "loss_num": 0.0164794921875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 415071328, + "step": 3286 + }, + { + "epoch": 0.8431988712883987, + "grad_norm": 33.78357696533203, + "learning_rate": 5e-06, + "loss": 0.867, + "num_input_tokens_seen": 415197192, + "step": 3287 + }, + { + "epoch": 0.8431988712883987, + "loss": 0.8455868363380432, + "loss_ce": 0.0013485063100233674, + "loss_iou": 0.392578125, + "loss_num": 0.0120849609375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 415197192, + "step": 3287 + }, + { + "epoch": 0.843455396652344, + "grad_norm": 56.37723922729492, + "learning_rate": 5e-06, + "loss": 0.9964, + "num_input_tokens_seen": 415323348, + "step": 3288 + }, + { + "epoch": 0.843455396652344, + "loss": 0.9011285305023193, + "loss_ce": 0.00024963394389487803, + "loss_iou": 0.40234375, + "loss_num": 0.018798828125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 415323348, + "step": 3288 + }, + { + "epoch": 0.8437119220162893, + "grad_norm": 54.682762145996094, + "learning_rate": 5e-06, + "loss": 0.9587, + "num_input_tokens_seen": 415449544, + "step": 3289 + }, + { + "epoch": 0.8437119220162893, + "loss": 1.0842171907424927, + "loss_ce": 0.001209419802762568, + "loss_iou": 0.49609375, + "loss_num": 0.0177001953125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 415449544, + "step": 3289 + }, + { + "epoch": 0.8439684473802347, + "grad_norm": 24.957805633544922, + "learning_rate": 5e-06, + "loss": 0.9365, + "num_input_tokens_seen": 415576068, + "step": 3290 + }, + { + "epoch": 0.8439684473802347, + "loss": 0.805275559425354, + "loss_ce": 0.0010763676837086678, + "loss_iou": 0.369140625, + "loss_num": 0.01287841796875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 415576068, + "step": 3290 + }, + { + "epoch": 0.8442249727441801, + "grad_norm": 25.76247215270996, + "learning_rate": 5e-06, + "loss": 0.9071, + "num_input_tokens_seen": 415703608, + "step": 3291 + }, + { + "epoch": 0.8442249727441801, + "loss": 1.3586525917053223, + "loss_ce": 0.0007424566429108381, + "loss_iou": 0.61328125, + "loss_num": 0.026123046875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 415703608, + "step": 3291 + }, + { + "epoch": 0.8444814981081255, + "grad_norm": 31.472679138183594, + "learning_rate": 5e-06, + "loss": 0.8919, + "num_input_tokens_seen": 415829348, + "step": 3292 + }, + { + "epoch": 0.8444814981081255, + "loss": 0.892143964767456, + "loss_ce": 0.002495552645996213, + "loss_iou": 0.40625, + "loss_num": 0.01495361328125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 415829348, + "step": 3292 + }, + { + "epoch": 0.8447380234720708, + "grad_norm": 41.38591766357422, + "learning_rate": 5e-06, + "loss": 0.8937, + "num_input_tokens_seen": 415954912, + "step": 3293 + }, + { + "epoch": 0.8447380234720708, + "loss": 0.679589569568634, + "loss_ce": 0.00014623221068177372, + "loss_iou": 0.322265625, + "loss_num": 0.007171630859375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 415954912, + "step": 3293 + }, + { + "epoch": 0.8449945488360162, + "grad_norm": 38.4943962097168, + "learning_rate": 5e-06, + "loss": 0.8725, + "num_input_tokens_seen": 416081152, + "step": 3294 + }, + { + "epoch": 0.8449945488360162, + "loss": 0.811089038848877, + "loss_ce": 0.00029801303753629327, + "loss_iou": 0.373046875, + "loss_num": 0.0128173828125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 416081152, + "step": 3294 + }, + { + "epoch": 0.8452510741999615, + "grad_norm": 32.25592803955078, + "learning_rate": 5e-06, + "loss": 0.8849, + "num_input_tokens_seen": 416206712, + "step": 3295 + }, + { + "epoch": 0.8452510741999615, + "loss": 0.7446740865707397, + "loss_ce": 0.0007775577250868082, + "loss_iou": 0.3359375, + "loss_num": 0.01458740234375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 416206712, + "step": 3295 + }, + { + "epoch": 0.8455075995639069, + "grad_norm": 32.295989990234375, + "learning_rate": 5e-06, + "loss": 1.0084, + "num_input_tokens_seen": 416333068, + "step": 3296 + }, + { + "epoch": 0.8455075995639069, + "loss": 1.0649378299713135, + "loss_ce": 0.003414318896830082, + "loss_iou": 0.48046875, + "loss_num": 0.019775390625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 416333068, + "step": 3296 + }, + { + "epoch": 0.8457641249278522, + "grad_norm": 52.71395492553711, + "learning_rate": 5e-06, + "loss": 1.0534, + "num_input_tokens_seen": 416459752, + "step": 3297 + }, + { + "epoch": 0.8457641249278522, + "loss": 1.0659351348876953, + "loss_ce": 0.00026137533131986856, + "loss_iou": 0.48046875, + "loss_num": 0.0213623046875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 416459752, + "step": 3297 + }, + { + "epoch": 0.8460206502917976, + "grad_norm": 43.42734146118164, + "learning_rate": 5e-06, + "loss": 0.8787, + "num_input_tokens_seen": 416586940, + "step": 3298 + }, + { + "epoch": 0.8460206502917976, + "loss": 0.77693772315979, + "loss_ce": 0.008383046835660934, + "loss_iou": 0.361328125, + "loss_num": 0.00958251953125, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 416586940, + "step": 3298 + }, + { + "epoch": 0.846277175655743, + "grad_norm": 30.562435150146484, + "learning_rate": 5e-06, + "loss": 0.9414, + "num_input_tokens_seen": 416713780, + "step": 3299 + }, + { + "epoch": 0.846277175655743, + "loss": 0.8217019438743591, + "loss_ce": 0.0011453131446614861, + "loss_iou": 0.3828125, + "loss_num": 0.010986328125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 416713780, + "step": 3299 + }, + { + "epoch": 0.8465337010196883, + "grad_norm": 34.46150207519531, + "learning_rate": 5e-06, + "loss": 0.8957, + "num_input_tokens_seen": 416840240, + "step": 3300 + }, + { + "epoch": 0.8465337010196883, + "loss": 0.8753318190574646, + "loss_ce": 0.0013083890080451965, + "loss_iou": 0.408203125, + "loss_num": 0.0113525390625, + "loss_xval": 0.875, + "num_input_tokens_seen": 416840240, + "step": 3300 + }, + { + "epoch": 0.8467902263836337, + "grad_norm": 39.962318420410156, + "learning_rate": 5e-06, + "loss": 0.9044, + "num_input_tokens_seen": 416966024, + "step": 3301 + }, + { + "epoch": 0.8467902263836337, + "loss": 0.9036628007888794, + "loss_ce": 0.0003424343012738973, + "loss_iou": 0.419921875, + "loss_num": 0.0126953125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 416966024, + "step": 3301 + }, + { + "epoch": 0.847046751747579, + "grad_norm": 40.47099304199219, + "learning_rate": 5e-06, + "loss": 0.8808, + "num_input_tokens_seen": 417092440, + "step": 3302 + }, + { + "epoch": 0.847046751747579, + "loss": 0.835878849029541, + "loss_ce": 0.0011620419099926949, + "loss_iou": 0.380859375, + "loss_num": 0.0147705078125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 417092440, + "step": 3302 + }, + { + "epoch": 0.8473032771115244, + "grad_norm": 42.88495635986328, + "learning_rate": 5e-06, + "loss": 0.9904, + "num_input_tokens_seen": 417217652, + "step": 3303 + }, + { + "epoch": 0.8473032771115244, + "loss": 1.0145187377929688, + "loss_ce": 0.00035862805088981986, + "loss_iou": 0.4609375, + "loss_num": 0.018310546875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 417217652, + "step": 3303 + }, + { + "epoch": 0.8475598024754698, + "grad_norm": 42.10586166381836, + "learning_rate": 5e-06, + "loss": 0.9256, + "num_input_tokens_seen": 417343552, + "step": 3304 + }, + { + "epoch": 0.8475598024754698, + "loss": 1.1715761423110962, + "loss_ce": 0.0021425692830234766, + "loss_iou": 0.53125, + "loss_num": 0.0208740234375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 417343552, + "step": 3304 + }, + { + "epoch": 0.8478163278394151, + "grad_norm": 59.61101150512695, + "learning_rate": 5e-06, + "loss": 1.0267, + "num_input_tokens_seen": 417470136, + "step": 3305 + }, + { + "epoch": 0.8478163278394151, + "loss": 0.8943018317222595, + "loss_ce": 0.0007471424178220332, + "loss_iou": 0.40234375, + "loss_num": 0.0174560546875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 417470136, + "step": 3305 + }, + { + "epoch": 0.8480728532033605, + "grad_norm": 46.93505859375, + "learning_rate": 5e-06, + "loss": 0.9287, + "num_input_tokens_seen": 417596044, + "step": 3306 + }, + { + "epoch": 0.8480728532033605, + "loss": 1.0100346803665161, + "loss_ce": 0.0002690745168365538, + "loss_iou": 0.4609375, + "loss_num": 0.0177001953125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 417596044, + "step": 3306 + }, + { + "epoch": 0.8483293785673058, + "grad_norm": 42.36141586303711, + "learning_rate": 5e-06, + "loss": 0.8381, + "num_input_tokens_seen": 417722768, + "step": 3307 + }, + { + "epoch": 0.8483293785673058, + "loss": 0.7937300205230713, + "loss_ce": 0.0002730304258875549, + "loss_iou": 0.359375, + "loss_num": 0.01495361328125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 417722768, + "step": 3307 + }, + { + "epoch": 0.8485859039312512, + "grad_norm": 40.73113250732422, + "learning_rate": 5e-06, + "loss": 0.9088, + "num_input_tokens_seen": 417848880, + "step": 3308 + }, + { + "epoch": 0.8485859039312512, + "loss": 0.8894962072372437, + "loss_ce": 0.003265778999775648, + "loss_iou": 0.40625, + "loss_num": 0.01470947265625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 417848880, + "step": 3308 + }, + { + "epoch": 0.8488424292951966, + "grad_norm": 45.95648956298828, + "learning_rate": 5e-06, + "loss": 1.0072, + "num_input_tokens_seen": 417974892, + "step": 3309 + }, + { + "epoch": 0.8488424292951966, + "loss": 1.0475322008132935, + "loss_ce": 0.0006571850390173495, + "loss_iou": 0.47265625, + "loss_num": 0.0208740234375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 417974892, + "step": 3309 + }, + { + "epoch": 0.8490989546591419, + "grad_norm": 46.05951690673828, + "learning_rate": 5e-06, + "loss": 0.944, + "num_input_tokens_seen": 418101116, + "step": 3310 + }, + { + "epoch": 0.8490989546591419, + "loss": 0.8296672105789185, + "loss_ce": 0.002030501840636134, + "loss_iou": 0.3828125, + "loss_num": 0.01202392578125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 418101116, + "step": 3310 + }, + { + "epoch": 0.8493554800230873, + "grad_norm": 46.78510665893555, + "learning_rate": 5e-06, + "loss": 0.8446, + "num_input_tokens_seen": 418228500, + "step": 3311 + }, + { + "epoch": 0.8493554800230873, + "loss": 0.839684784412384, + "loss_ce": 0.0003292882756795734, + "loss_iou": 0.38671875, + "loss_num": 0.01275634765625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 418228500, + "step": 3311 + }, + { + "epoch": 0.8496120053870326, + "grad_norm": 43.384525299072266, + "learning_rate": 5e-06, + "loss": 0.9264, + "num_input_tokens_seen": 418355236, + "step": 3312 + }, + { + "epoch": 0.8496120053870326, + "loss": 0.9101749062538147, + "loss_ce": 0.0005069556646049023, + "loss_iou": 0.41015625, + "loss_num": 0.0177001953125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 418355236, + "step": 3312 + }, + { + "epoch": 0.849868530750978, + "grad_norm": 46.69747543334961, + "learning_rate": 5e-06, + "loss": 0.9957, + "num_input_tokens_seen": 418481108, + "step": 3313 + }, + { + "epoch": 0.849868530750978, + "loss": 0.9128376245498657, + "loss_ce": 0.004146212246268988, + "loss_iou": 0.42578125, + "loss_num": 0.01141357421875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 418481108, + "step": 3313 + }, + { + "epoch": 0.8501250561149234, + "grad_norm": 36.590126037597656, + "learning_rate": 5e-06, + "loss": 1.1007, + "num_input_tokens_seen": 418605364, + "step": 3314 + }, + { + "epoch": 0.8501250561149234, + "loss": 1.1354076862335205, + "loss_ce": 0.0006420772988349199, + "loss_iou": 0.5, + "loss_num": 0.0260009765625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 418605364, + "step": 3314 + }, + { + "epoch": 0.8503815814788688, + "grad_norm": 14.583720207214355, + "learning_rate": 5e-06, + "loss": 0.9979, + "num_input_tokens_seen": 418730808, + "step": 3315 + }, + { + "epoch": 0.8503815814788688, + "loss": 0.9814618825912476, + "loss_ce": 0.0019696494564414024, + "loss_iou": 0.453125, + "loss_num": 0.01483154296875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 418730808, + "step": 3315 + }, + { + "epoch": 0.850638106842814, + "grad_norm": 39.342960357666016, + "learning_rate": 5e-06, + "loss": 0.9204, + "num_input_tokens_seen": 418857948, + "step": 3316 + }, + { + "epoch": 0.850638106842814, + "loss": 0.9751467704772949, + "loss_ce": 0.0010256250388920307, + "loss_iou": 0.44140625, + "loss_num": 0.017822265625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 418857948, + "step": 3316 + }, + { + "epoch": 0.8508946322067594, + "grad_norm": 56.561038970947266, + "learning_rate": 5e-06, + "loss": 0.9211, + "num_input_tokens_seen": 418984456, + "step": 3317 + }, + { + "epoch": 0.8508946322067594, + "loss": 1.0902132987976074, + "loss_ce": 0.0013460994232445955, + "loss_iou": 0.4921875, + "loss_num": 0.0206298828125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 418984456, + "step": 3317 + }, + { + "epoch": 0.8511511575707048, + "grad_norm": 63.44384765625, + "learning_rate": 5e-06, + "loss": 0.9397, + "num_input_tokens_seen": 419110968, + "step": 3318 + }, + { + "epoch": 0.8511511575707048, + "loss": 0.9654141068458557, + "loss_ce": 0.0010586383286863565, + "loss_iou": 0.419921875, + "loss_num": 0.0250244140625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 419110968, + "step": 3318 + }, + { + "epoch": 0.8514076829346502, + "grad_norm": 51.43317794799805, + "learning_rate": 5e-06, + "loss": 0.9992, + "num_input_tokens_seen": 419237064, + "step": 3319 + }, + { + "epoch": 0.8514076829346502, + "loss": 0.9523866176605225, + "loss_ce": 0.00023817787587177008, + "loss_iou": 0.4375, + "loss_num": 0.015869140625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 419237064, + "step": 3319 + }, + { + "epoch": 0.8516642082985956, + "grad_norm": 46.81265640258789, + "learning_rate": 5e-06, + "loss": 0.9525, + "num_input_tokens_seen": 419363664, + "step": 3320 + }, + { + "epoch": 0.8516642082985956, + "loss": 0.9412074089050293, + "loss_ce": 0.0007776570273563266, + "loss_iou": 0.4296875, + "loss_num": 0.0166015625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 419363664, + "step": 3320 + }, + { + "epoch": 0.8519207336625408, + "grad_norm": 55.83944320678711, + "learning_rate": 5e-06, + "loss": 1.0074, + "num_input_tokens_seen": 419489444, + "step": 3321 + }, + { + "epoch": 0.8519207336625408, + "loss": 1.0929300785064697, + "loss_ce": 0.0030863601714372635, + "loss_iou": 0.49609375, + "loss_num": 0.0196533203125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 419489444, + "step": 3321 + }, + { + "epoch": 0.8521772590264862, + "grad_norm": 49.659915924072266, + "learning_rate": 5e-06, + "loss": 1.0026, + "num_input_tokens_seen": 419615152, + "step": 3322 + }, + { + "epoch": 0.8521772590264862, + "loss": 0.9486437439918518, + "loss_ce": 0.0004015436570625752, + "loss_iou": 0.41796875, + "loss_num": 0.022705078125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 419615152, + "step": 3322 + }, + { + "epoch": 0.8524337843904316, + "grad_norm": 40.574012756347656, + "learning_rate": 5e-06, + "loss": 0.8748, + "num_input_tokens_seen": 419741252, + "step": 3323 + }, + { + "epoch": 0.8524337843904316, + "loss": 0.7848821878433228, + "loss_ce": 0.00021424230362754315, + "loss_iou": 0.373046875, + "loss_num": 0.0076904296875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 419741252, + "step": 3323 + }, + { + "epoch": 0.852690309754377, + "grad_norm": 39.735599517822266, + "learning_rate": 5e-06, + "loss": 0.9231, + "num_input_tokens_seen": 419867728, + "step": 3324 + }, + { + "epoch": 0.852690309754377, + "loss": 1.0229344367980957, + "loss_ce": 0.0004733927780762315, + "loss_iou": 0.46875, + "loss_num": 0.0167236328125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 419867728, + "step": 3324 + }, + { + "epoch": 0.8529468351183224, + "grad_norm": 50.71043395996094, + "learning_rate": 5e-06, + "loss": 0.8996, + "num_input_tokens_seen": 419994460, + "step": 3325 + }, + { + "epoch": 0.8529468351183224, + "loss": 0.8749765157699585, + "loss_ce": 0.003394470317289233, + "loss_iou": 0.3984375, + "loss_num": 0.01470947265625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 419994460, + "step": 3325 + }, + { + "epoch": 0.8532033604822676, + "grad_norm": 68.93893432617188, + "learning_rate": 5e-06, + "loss": 1.0277, + "num_input_tokens_seen": 420120232, + "step": 3326 + }, + { + "epoch": 0.8532033604822676, + "loss": 1.0536737442016602, + "loss_ce": 0.0009393331129103899, + "loss_iou": 0.48828125, + "loss_num": 0.014892578125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 420120232, + "step": 3326 + }, + { + "epoch": 0.853459885846213, + "grad_norm": 50.659461975097656, + "learning_rate": 5e-06, + "loss": 0.9334, + "num_input_tokens_seen": 420247044, + "step": 3327 + }, + { + "epoch": 0.853459885846213, + "loss": 0.9205120801925659, + "loss_ce": 0.006205474026501179, + "loss_iou": 0.423828125, + "loss_num": 0.01287841796875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 420247044, + "step": 3327 + }, + { + "epoch": 0.8537164112101584, + "grad_norm": 37.88582229614258, + "learning_rate": 5e-06, + "loss": 0.9291, + "num_input_tokens_seen": 420373836, + "step": 3328 + }, + { + "epoch": 0.8537164112101584, + "loss": 0.7829341888427734, + "loss_ce": 0.00021940979058854282, + "loss_iou": 0.373046875, + "loss_num": 0.007537841796875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 420373836, + "step": 3328 + }, + { + "epoch": 0.8539729365741038, + "grad_norm": 44.22090148925781, + "learning_rate": 5e-06, + "loss": 0.9369, + "num_input_tokens_seen": 420499232, + "step": 3329 + }, + { + "epoch": 0.8539729365741038, + "loss": 0.9793094396591187, + "loss_ce": 0.0003055102133657783, + "loss_iou": 0.43359375, + "loss_num": 0.0223388671875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 420499232, + "step": 3329 + }, + { + "epoch": 0.8542294619380492, + "grad_norm": 35.67002868652344, + "learning_rate": 5e-06, + "loss": 0.897, + "num_input_tokens_seen": 420625116, + "step": 3330 + }, + { + "epoch": 0.8542294619380492, + "loss": 0.9607816934585571, + "loss_ce": 0.0015531876124441624, + "loss_iou": 0.44140625, + "loss_num": 0.0147705078125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 420625116, + "step": 3330 + }, + { + "epoch": 0.8544859873019944, + "grad_norm": 43.643001556396484, + "learning_rate": 5e-06, + "loss": 0.9537, + "num_input_tokens_seen": 420752352, + "step": 3331 + }, + { + "epoch": 0.8544859873019944, + "loss": 0.822020947933197, + "loss_ce": 0.0026850299909710884, + "loss_iou": 0.38671875, + "loss_num": 0.0091552734375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 420752352, + "step": 3331 + }, + { + "epoch": 0.8547425126659398, + "grad_norm": 56.62778854370117, + "learning_rate": 5e-06, + "loss": 0.9474, + "num_input_tokens_seen": 420878252, + "step": 3332 + }, + { + "epoch": 0.8547425126659398, + "loss": 1.0628888607025146, + "loss_ce": 0.00038889748975634575, + "loss_iou": 0.49609375, + "loss_num": 0.01422119140625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 420878252, + "step": 3332 + }, + { + "epoch": 0.8549990380298852, + "grad_norm": 51.94295883178711, + "learning_rate": 5e-06, + "loss": 0.9271, + "num_input_tokens_seen": 421004372, + "step": 3333 + }, + { + "epoch": 0.8549990380298852, + "loss": 0.8880189657211304, + "loss_ce": 0.0008119416306726635, + "loss_iou": 0.4140625, + "loss_num": 0.011962890625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 421004372, + "step": 3333 + }, + { + "epoch": 0.8552555633938306, + "grad_norm": 43.35361862182617, + "learning_rate": 5e-06, + "loss": 0.9945, + "num_input_tokens_seen": 421129768, + "step": 3334 + }, + { + "epoch": 0.8552555633938306, + "loss": 0.9212596416473389, + "loss_ce": 0.0008495484944432974, + "loss_iou": 0.421875, + "loss_num": 0.01531982421875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 421129768, + "step": 3334 + }, + { + "epoch": 0.855512088757776, + "grad_norm": 40.47994613647461, + "learning_rate": 5e-06, + "loss": 0.9076, + "num_input_tokens_seen": 421256156, + "step": 3335 + }, + { + "epoch": 0.855512088757776, + "loss": 0.9002221822738647, + "loss_ce": 0.0008081350242719054, + "loss_iou": 0.423828125, + "loss_num": 0.01025390625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 421256156, + "step": 3335 + }, + { + "epoch": 0.8557686141217213, + "grad_norm": 39.135318756103516, + "learning_rate": 5e-06, + "loss": 1.0198, + "num_input_tokens_seen": 421382736, + "step": 3336 + }, + { + "epoch": 0.8557686141217213, + "loss": 1.1542543172836304, + "loss_ce": 0.00581681914627552, + "loss_iou": 0.53125, + "loss_num": 0.017822265625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 421382736, + "step": 3336 + }, + { + "epoch": 0.8560251394856666, + "grad_norm": 61.504112243652344, + "learning_rate": 5e-06, + "loss": 0.9085, + "num_input_tokens_seen": 421508076, + "step": 3337 + }, + { + "epoch": 0.8560251394856666, + "loss": 1.033182144165039, + "loss_ce": 0.0004673894727602601, + "loss_iou": 0.48828125, + "loss_num": 0.0111083984375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 421508076, + "step": 3337 + }, + { + "epoch": 0.856281664849612, + "grad_norm": 31.722789764404297, + "learning_rate": 5e-06, + "loss": 0.9298, + "num_input_tokens_seen": 421632960, + "step": 3338 + }, + { + "epoch": 0.856281664849612, + "loss": 0.9868402481079102, + "loss_ce": 0.001488760462962091, + "loss_iou": 0.44140625, + "loss_num": 0.0205078125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 421632960, + "step": 3338 + }, + { + "epoch": 0.8565381902135574, + "grad_norm": 37.03248977661133, + "learning_rate": 5e-06, + "loss": 0.928, + "num_input_tokens_seen": 421759240, + "step": 3339 + }, + { + "epoch": 0.8565381902135574, + "loss": 0.9903021454811096, + "loss_ce": 0.005438865628093481, + "loss_iou": 0.455078125, + "loss_num": 0.01507568359375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 421759240, + "step": 3339 + }, + { + "epoch": 0.8567947155775028, + "grad_norm": 55.56910705566406, + "learning_rate": 5e-06, + "loss": 0.9195, + "num_input_tokens_seen": 421886056, + "step": 3340 + }, + { + "epoch": 0.8567947155775028, + "loss": 1.0564732551574707, + "loss_ce": 0.0012974500423297286, + "loss_iou": 0.494140625, + "loss_num": 0.01312255859375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 421886056, + "step": 3340 + }, + { + "epoch": 0.8570512409414481, + "grad_norm": 44.19303512573242, + "learning_rate": 5e-06, + "loss": 1.0791, + "num_input_tokens_seen": 422011544, + "step": 3341 + }, + { + "epoch": 0.8570512409414481, + "loss": 0.9943357706069946, + "loss_ce": 0.00019516682368703187, + "loss_iou": 0.45703125, + "loss_num": 0.015869140625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 422011544, + "step": 3341 + }, + { + "epoch": 0.8573077663053934, + "grad_norm": 30.351472854614258, + "learning_rate": 5e-06, + "loss": 0.9021, + "num_input_tokens_seen": 422138792, + "step": 3342 + }, + { + "epoch": 0.8573077663053934, + "loss": 1.0064613819122314, + "loss_ce": 0.002799329813569784, + "loss_iou": 0.470703125, + "loss_num": 0.01214599609375, + "loss_xval": 1.0, + "num_input_tokens_seen": 422138792, + "step": 3342 + }, + { + "epoch": 0.8575642916693388, + "grad_norm": 36.865413665771484, + "learning_rate": 5e-06, + "loss": 1.0077, + "num_input_tokens_seen": 422263876, + "step": 3343 + }, + { + "epoch": 0.8575642916693388, + "loss": 1.0200577974319458, + "loss_ce": 0.0029679194558411837, + "loss_iou": 0.466796875, + "loss_num": 0.0166015625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 422263876, + "step": 3343 + }, + { + "epoch": 0.8578208170332842, + "grad_norm": 45.51285934448242, + "learning_rate": 5e-06, + "loss": 0.9348, + "num_input_tokens_seen": 422391180, + "step": 3344 + }, + { + "epoch": 0.8578208170332842, + "loss": 0.910622239112854, + "loss_ce": 0.0019308581249788404, + "loss_iou": 0.4296875, + "loss_num": 0.009521484375, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 422391180, + "step": 3344 + }, + { + "epoch": 0.8580773423972295, + "grad_norm": 50.99921798706055, + "learning_rate": 5e-06, + "loss": 1.0036, + "num_input_tokens_seen": 422517516, + "step": 3345 + }, + { + "epoch": 0.8580773423972295, + "loss": 1.1606777906417847, + "loss_ce": 0.0024746404960751534, + "loss_iou": 0.515625, + "loss_num": 0.0260009765625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 422517516, + "step": 3345 + }, + { + "epoch": 0.8583338677611749, + "grad_norm": 60.35527038574219, + "learning_rate": 5e-06, + "loss": 0.9523, + "num_input_tokens_seen": 422644492, + "step": 3346 + }, + { + "epoch": 0.8583338677611749, + "loss": 0.9860947132110596, + "loss_ce": 0.0007431993144564331, + "loss_iou": 0.451171875, + "loss_num": 0.0162353515625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 422644492, + "step": 3346 + }, + { + "epoch": 0.8585903931251202, + "grad_norm": 38.97570037841797, + "learning_rate": 5e-06, + "loss": 0.9825, + "num_input_tokens_seen": 422770144, + "step": 3347 + }, + { + "epoch": 0.8585903931251202, + "loss": 0.9971222877502441, + "loss_ce": 0.0049348329193890095, + "loss_iou": 0.447265625, + "loss_num": 0.0196533203125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 422770144, + "step": 3347 + }, + { + "epoch": 0.8588469184890656, + "grad_norm": 20.011890411376953, + "learning_rate": 5e-06, + "loss": 0.8473, + "num_input_tokens_seen": 422895820, + "step": 3348 + }, + { + "epoch": 0.8588469184890656, + "loss": 0.7862467765808105, + "loss_ce": 0.0006022133165970445, + "loss_iou": 0.373046875, + "loss_num": 0.00799560546875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 422895820, + "step": 3348 + }, + { + "epoch": 0.859103443853011, + "grad_norm": 28.97025489807129, + "learning_rate": 5e-06, + "loss": 0.8848, + "num_input_tokens_seen": 423022080, + "step": 3349 + }, + { + "epoch": 0.859103443853011, + "loss": 0.729302167892456, + "loss_ce": 0.0012747516157105565, + "loss_iou": 0.33203125, + "loss_num": 0.01318359375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 423022080, + "step": 3349 + }, + { + "epoch": 0.8593599692169563, + "grad_norm": 41.44636154174805, + "learning_rate": 5e-06, + "loss": 0.8497, + "num_input_tokens_seen": 423148008, + "step": 3350 + }, + { + "epoch": 0.8593599692169563, + "loss": 0.8323186635971069, + "loss_ce": 0.0007757161511108279, + "loss_iou": 0.380859375, + "loss_num": 0.0140380859375, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 423148008, + "step": 3350 + }, + { + "epoch": 0.8596164945809017, + "grad_norm": 47.78594970703125, + "learning_rate": 5e-06, + "loss": 0.9166, + "num_input_tokens_seen": 423273512, + "step": 3351 + }, + { + "epoch": 0.8596164945809017, + "loss": 0.9198451042175293, + "loss_ce": 0.001388048636727035, + "loss_iou": 0.43359375, + "loss_num": 0.010009765625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 423273512, + "step": 3351 + }, + { + "epoch": 0.859873019944847, + "grad_norm": 52.565208435058594, + "learning_rate": 5e-06, + "loss": 0.8765, + "num_input_tokens_seen": 423399820, + "step": 3352 + }, + { + "epoch": 0.859873019944847, + "loss": 0.9637659788131714, + "loss_ce": 0.0008753291331231594, + "loss_iou": 0.44921875, + "loss_num": 0.01287841796875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 423399820, + "step": 3352 + }, + { + "epoch": 0.8601295453087924, + "grad_norm": 39.76402282714844, + "learning_rate": 5e-06, + "loss": 0.9205, + "num_input_tokens_seen": 423525880, + "step": 3353 + }, + { + "epoch": 0.8601295453087924, + "loss": 0.9379321336746216, + "loss_ce": 0.00311764283105731, + "loss_iou": 0.43359375, + "loss_num": 0.01361083984375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 423525880, + "step": 3353 + }, + { + "epoch": 0.8603860706727378, + "grad_norm": 23.899873733520508, + "learning_rate": 5e-06, + "loss": 0.9853, + "num_input_tokens_seen": 423652232, + "step": 3354 + }, + { + "epoch": 0.8603860706727378, + "loss": 0.77067631483078, + "loss_ce": 0.0021216338500380516, + "loss_iou": 0.369140625, + "loss_num": 0.005859375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 423652232, + "step": 3354 + }, + { + "epoch": 0.8606425960366831, + "grad_norm": 30.767906188964844, + "learning_rate": 5e-06, + "loss": 0.8674, + "num_input_tokens_seen": 423778436, + "step": 3355 + }, + { + "epoch": 0.8606425960366831, + "loss": 0.7608532905578613, + "loss_ce": 0.0008435493800789118, + "loss_iou": 0.359375, + "loss_num": 0.00836181640625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 423778436, + "step": 3355 + }, + { + "epoch": 0.8608991214006285, + "grad_norm": 37.987457275390625, + "learning_rate": 5e-06, + "loss": 0.9334, + "num_input_tokens_seen": 423905384, + "step": 3356 + }, + { + "epoch": 0.8608991214006285, + "loss": 0.8292726278305054, + "loss_ce": 0.0006593377329409122, + "loss_iou": 0.388671875, + "loss_num": 0.01055908203125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 423905384, + "step": 3356 + }, + { + "epoch": 0.8611556467645739, + "grad_norm": 39.740055084228516, + "learning_rate": 5e-06, + "loss": 0.8333, + "num_input_tokens_seen": 424030460, + "step": 3357 + }, + { + "epoch": 0.8611556467645739, + "loss": 0.8573504686355591, + "loss_ce": 0.0004168879531789571, + "loss_iou": 0.412109375, + "loss_num": 0.00628662109375, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 424030460, + "step": 3357 + }, + { + "epoch": 0.8614121721285192, + "grad_norm": 41.32125473022461, + "learning_rate": 5e-06, + "loss": 0.8355, + "num_input_tokens_seen": 424155252, + "step": 3358 + }, + { + "epoch": 0.8614121721285192, + "loss": 0.8626106977462769, + "loss_ce": 0.0005501298583112657, + "loss_iou": 0.412109375, + "loss_num": 0.007354736328125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 424155252, + "step": 3358 + }, + { + "epoch": 0.8616686974924646, + "grad_norm": 43.381717681884766, + "learning_rate": 5e-06, + "loss": 0.933, + "num_input_tokens_seen": 424281124, + "step": 3359 + }, + { + "epoch": 0.8616686974924646, + "loss": 0.7797003984451294, + "loss_ce": 0.0004035631427541375, + "loss_iou": 0.375, + "loss_num": 0.006317138671875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 424281124, + "step": 3359 + }, + { + "epoch": 0.8619252228564099, + "grad_norm": 50.07094192504883, + "learning_rate": 5e-06, + "loss": 0.9937, + "num_input_tokens_seen": 424406748, + "step": 3360 + }, + { + "epoch": 0.8619252228564099, + "loss": 1.1648428440093994, + "loss_ce": 0.0022452790290117264, + "loss_iou": 0.515625, + "loss_num": 0.0267333984375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 424406748, + "step": 3360 + }, + { + "epoch": 0.8621817482203553, + "grad_norm": 46.07054901123047, + "learning_rate": 5e-06, + "loss": 0.9666, + "num_input_tokens_seen": 424533692, + "step": 3361 + }, + { + "epoch": 0.8621817482203553, + "loss": 1.134671688079834, + "loss_ce": 0.00478879502043128, + "loss_iou": 0.515625, + "loss_num": 0.0191650390625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 424533692, + "step": 3361 + }, + { + "epoch": 0.8624382735843007, + "grad_norm": 40.93661880493164, + "learning_rate": 5e-06, + "loss": 0.8428, + "num_input_tokens_seen": 424660732, + "step": 3362 + }, + { + "epoch": 0.8624382735843007, + "loss": 0.7039631605148315, + "loss_ce": 0.0008381842635571957, + "loss_iou": 0.33203125, + "loss_num": 0.00811767578125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 424660732, + "step": 3362 + }, + { + "epoch": 0.862694798948246, + "grad_norm": 43.45201110839844, + "learning_rate": 5e-06, + "loss": 0.9564, + "num_input_tokens_seen": 424787256, + "step": 3363 + }, + { + "epoch": 0.862694798948246, + "loss": 1.0155932903289795, + "loss_ce": 0.0014332043938338757, + "loss_iou": 0.4375, + "loss_num": 0.02783203125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 424787256, + "step": 3363 + }, + { + "epoch": 0.8629513243121913, + "grad_norm": 46.61076736450195, + "learning_rate": 5e-06, + "loss": 0.9862, + "num_input_tokens_seen": 424912688, + "step": 3364 + }, + { + "epoch": 0.8629513243121913, + "loss": 0.8970376253128052, + "loss_ce": 0.000309094728436321, + "loss_iou": 0.41796875, + "loss_num": 0.01239013671875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 424912688, + "step": 3364 + }, + { + "epoch": 0.8632078496761367, + "grad_norm": 39.44859313964844, + "learning_rate": 5e-06, + "loss": 0.9531, + "num_input_tokens_seen": 425039368, + "step": 3365 + }, + { + "epoch": 0.8632078496761367, + "loss": 0.9733309745788574, + "loss_ce": 0.0009188262629322708, + "loss_iou": 0.447265625, + "loss_num": 0.015869140625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 425039368, + "step": 3365 + }, + { + "epoch": 0.8634643750400821, + "grad_norm": 48.57670593261719, + "learning_rate": 5e-06, + "loss": 0.8386, + "num_input_tokens_seen": 425165132, + "step": 3366 + }, + { + "epoch": 0.8634643750400821, + "loss": 0.9195308685302734, + "loss_ce": 0.001073860446922481, + "loss_iou": 0.439453125, + "loss_num": 0.0076904296875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 425165132, + "step": 3366 + }, + { + "epoch": 0.8637209004040275, + "grad_norm": 48.26242446899414, + "learning_rate": 5e-06, + "loss": 1.008, + "num_input_tokens_seen": 425292056, + "step": 3367 + }, + { + "epoch": 0.8637209004040275, + "loss": 0.8665575385093689, + "loss_ce": 0.002788014942780137, + "loss_iou": 0.408203125, + "loss_num": 0.009521484375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 425292056, + "step": 3367 + }, + { + "epoch": 0.8639774257679728, + "grad_norm": 54.0738525390625, + "learning_rate": 5e-06, + "loss": 0.8853, + "num_input_tokens_seen": 425418972, + "step": 3368 + }, + { + "epoch": 0.8639774257679728, + "loss": 0.9666658639907837, + "loss_ce": 0.007193173747509718, + "loss_iou": 0.427734375, + "loss_num": 0.0203857421875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 425418972, + "step": 3368 + }, + { + "epoch": 0.8642339511319181, + "grad_norm": 48.69795608520508, + "learning_rate": 5e-06, + "loss": 0.9934, + "num_input_tokens_seen": 425543972, + "step": 3369 + }, + { + "epoch": 0.8642339511319181, + "loss": 0.8921758532524109, + "loss_ce": 0.0015508763026446104, + "loss_iou": 0.40625, + "loss_num": 0.0157470703125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 425543972, + "step": 3369 + }, + { + "epoch": 0.8644904764958635, + "grad_norm": 34.35894775390625, + "learning_rate": 5e-06, + "loss": 0.929, + "num_input_tokens_seen": 425670372, + "step": 3370 + }, + { + "epoch": 0.8644904764958635, + "loss": 0.7190690040588379, + "loss_ce": 0.00324870552867651, + "loss_iou": 0.337890625, + "loss_num": 0.0081787109375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 425670372, + "step": 3370 + }, + { + "epoch": 0.8647470018598089, + "grad_norm": 44.47837829589844, + "learning_rate": 5e-06, + "loss": 0.9657, + "num_input_tokens_seen": 425797076, + "step": 3371 + }, + { + "epoch": 0.8647470018598089, + "loss": 0.768699049949646, + "loss_ce": 0.003074073465541005, + "loss_iou": 0.369140625, + "loss_num": 0.00592041015625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 425797076, + "step": 3371 + }, + { + "epoch": 0.8650035272237543, + "grad_norm": 46.23942565917969, + "learning_rate": 5e-06, + "loss": 0.9657, + "num_input_tokens_seen": 425923092, + "step": 3372 + }, + { + "epoch": 0.8650035272237543, + "loss": 1.0020387172698975, + "loss_ce": 0.006921577267348766, + "loss_iou": 0.447265625, + "loss_num": 0.0196533203125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 425923092, + "step": 3372 + }, + { + "epoch": 0.8652600525876996, + "grad_norm": 44.11301803588867, + "learning_rate": 5e-06, + "loss": 0.9712, + "num_input_tokens_seen": 426048956, + "step": 3373 + }, + { + "epoch": 0.8652600525876996, + "loss": 1.0924731492996216, + "loss_ce": 0.0006762508419342339, + "loss_iou": 0.515625, + "loss_num": 0.01226806640625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 426048956, + "step": 3373 + }, + { + "epoch": 0.8655165779516449, + "grad_norm": 57.23667907714844, + "learning_rate": 5e-06, + "loss": 0.8883, + "num_input_tokens_seen": 426175380, + "step": 3374 + }, + { + "epoch": 0.8655165779516449, + "loss": 0.7591798305511475, + "loss_ce": 0.0016114846803247929, + "loss_iou": 0.349609375, + "loss_num": 0.01153564453125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 426175380, + "step": 3374 + }, + { + "epoch": 0.8657731033155903, + "grad_norm": 44.600189208984375, + "learning_rate": 5e-06, + "loss": 0.8803, + "num_input_tokens_seen": 426301256, + "step": 3375 + }, + { + "epoch": 0.8657731033155903, + "loss": 0.8591586947441101, + "loss_ce": 0.000271968194283545, + "loss_iou": 0.40234375, + "loss_num": 0.01055908203125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 426301256, + "step": 3375 + }, + { + "epoch": 0.8660296286795357, + "grad_norm": 21.06570816040039, + "learning_rate": 5e-06, + "loss": 0.9584, + "num_input_tokens_seen": 426427156, + "step": 3376 + }, + { + "epoch": 0.8660296286795357, + "loss": 0.8860450983047485, + "loss_ce": 0.0005470075411722064, + "loss_iou": 0.40234375, + "loss_num": 0.01611328125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 426427156, + "step": 3376 + }, + { + "epoch": 0.8662861540434811, + "grad_norm": 15.724703788757324, + "learning_rate": 5e-06, + "loss": 0.8875, + "num_input_tokens_seen": 426553248, + "step": 3377 + }, + { + "epoch": 0.8662861540434811, + "loss": 0.7928207516670227, + "loss_ce": 0.0005844469415023923, + "loss_iou": 0.3671875, + "loss_num": 0.01165771484375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 426553248, + "step": 3377 + }, + { + "epoch": 0.8665426794074265, + "grad_norm": 24.42426872253418, + "learning_rate": 5e-06, + "loss": 0.9363, + "num_input_tokens_seen": 426679776, + "step": 3378 + }, + { + "epoch": 0.8665426794074265, + "loss": 1.0451979637145996, + "loss_ce": 0.001252547139301896, + "loss_iou": 0.47265625, + "loss_num": 0.0194091796875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 426679776, + "step": 3378 + }, + { + "epoch": 0.8667992047713717, + "grad_norm": 42.975120544433594, + "learning_rate": 5e-06, + "loss": 0.9601, + "num_input_tokens_seen": 426805976, + "step": 3379 + }, + { + "epoch": 0.8667992047713717, + "loss": 0.9686539173126221, + "loss_ce": 0.0025894755963236094, + "loss_iou": 0.44140625, + "loss_num": 0.0169677734375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 426805976, + "step": 3379 + }, + { + "epoch": 0.8670557301353171, + "grad_norm": 35.80904006958008, + "learning_rate": 5e-06, + "loss": 0.991, + "num_input_tokens_seen": 426932576, + "step": 3380 + }, + { + "epoch": 0.8670557301353171, + "loss": 1.2306230068206787, + "loss_ce": 0.0021073054522275925, + "loss_iou": 0.5390625, + "loss_num": 0.0296630859375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 426932576, + "step": 3380 + }, + { + "epoch": 0.8673122554992625, + "grad_norm": 32.08248519897461, + "learning_rate": 5e-06, + "loss": 0.8841, + "num_input_tokens_seen": 427059692, + "step": 3381 + }, + { + "epoch": 0.8673122554992625, + "loss": 1.202010154724121, + "loss_ce": 0.00230314489454031, + "loss_iou": 0.5234375, + "loss_num": 0.02978515625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 427059692, + "step": 3381 + }, + { + "epoch": 0.8675687808632079, + "grad_norm": 43.120994567871094, + "learning_rate": 5e-06, + "loss": 0.9758, + "num_input_tokens_seen": 427185804, + "step": 3382 + }, + { + "epoch": 0.8675687808632079, + "loss": 0.8249133825302124, + "loss_ce": 0.0014270116807892919, + "loss_iou": 0.39453125, + "loss_num": 0.00726318359375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 427185804, + "step": 3382 + }, + { + "epoch": 0.8678253062271533, + "grad_norm": 41.59384536743164, + "learning_rate": 5e-06, + "loss": 0.8439, + "num_input_tokens_seen": 427311160, + "step": 3383 + }, + { + "epoch": 0.8678253062271533, + "loss": 0.8334184885025024, + "loss_ce": 0.0023637423291802406, + "loss_iou": 0.380859375, + "loss_num": 0.0142822265625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 427311160, + "step": 3383 + }, + { + "epoch": 0.8680818315910985, + "grad_norm": 43.55419921875, + "learning_rate": 5e-06, + "loss": 0.8903, + "num_input_tokens_seen": 427437900, + "step": 3384 + }, + { + "epoch": 0.8680818315910985, + "loss": 0.7252909541130066, + "loss_ce": 0.0016581214731559157, + "loss_iou": 0.345703125, + "loss_num": 0.006500244140625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 427437900, + "step": 3384 + }, + { + "epoch": 0.8683383569550439, + "grad_norm": 56.833534240722656, + "learning_rate": 5e-06, + "loss": 1.0618, + "num_input_tokens_seen": 427564092, + "step": 3385 + }, + { + "epoch": 0.8683383569550439, + "loss": 1.1164964437484741, + "loss_ce": 0.0007737508276477456, + "loss_iou": 0.5078125, + "loss_num": 0.0208740234375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 427564092, + "step": 3385 + }, + { + "epoch": 0.8685948823189893, + "grad_norm": 49.374027252197266, + "learning_rate": 5e-06, + "loss": 0.8793, + "num_input_tokens_seen": 427691068, + "step": 3386 + }, + { + "epoch": 0.8685948823189893, + "loss": 0.8780579566955566, + "loss_ce": 0.001593074994161725, + "loss_iou": 0.41015625, + "loss_num": 0.0113525390625, + "loss_xval": 0.875, + "num_input_tokens_seen": 427691068, + "step": 3386 + }, + { + "epoch": 0.8688514076829347, + "grad_norm": 52.796051025390625, + "learning_rate": 5e-06, + "loss": 0.9037, + "num_input_tokens_seen": 427817216, + "step": 3387 + }, + { + "epoch": 0.8688514076829347, + "loss": 1.182614803314209, + "loss_ce": 0.001462393207475543, + "loss_iou": 0.54296875, + "loss_num": 0.0191650390625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 427817216, + "step": 3387 + }, + { + "epoch": 0.86910793304688, + "grad_norm": 37.044437408447266, + "learning_rate": 5e-06, + "loss": 0.8977, + "num_input_tokens_seen": 427943440, + "step": 3388 + }, + { + "epoch": 0.86910793304688, + "loss": 0.9546312093734741, + "loss_ce": 0.002482714131474495, + "loss_iou": 0.43359375, + "loss_num": 0.0164794921875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 427943440, + "step": 3388 + }, + { + "epoch": 0.8693644584108253, + "grad_norm": 33.00278091430664, + "learning_rate": 5e-06, + "loss": 0.8285, + "num_input_tokens_seen": 428069404, + "step": 3389 + }, + { + "epoch": 0.8693644584108253, + "loss": 0.9340903759002686, + "loss_ce": 0.001473205629736185, + "loss_iou": 0.42578125, + "loss_num": 0.0164794921875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 428069404, + "step": 3389 + }, + { + "epoch": 0.8696209837747707, + "grad_norm": 38.71778869628906, + "learning_rate": 5e-06, + "loss": 0.9559, + "num_input_tokens_seen": 428195956, + "step": 3390 + }, + { + "epoch": 0.8696209837747707, + "loss": 0.98121178150177, + "loss_ce": 0.002696199109777808, + "loss_iou": 0.443359375, + "loss_num": 0.018310546875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 428195956, + "step": 3390 + }, + { + "epoch": 0.8698775091387161, + "grad_norm": 35.28464889526367, + "learning_rate": 5e-06, + "loss": 0.9349, + "num_input_tokens_seen": 428322832, + "step": 3391 + }, + { + "epoch": 0.8698775091387161, + "loss": 0.7408014535903931, + "loss_ce": 0.0005670484388247132, + "loss_iou": 0.357421875, + "loss_num": 0.0050048828125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 428322832, + "step": 3391 + }, + { + "epoch": 0.8701340345026615, + "grad_norm": 32.930442810058594, + "learning_rate": 5e-06, + "loss": 0.8576, + "num_input_tokens_seen": 428448880, + "step": 3392 + }, + { + "epoch": 0.8701340345026615, + "loss": 0.7007225751876831, + "loss_ce": 0.0005272486014291644, + "loss_iou": 0.33203125, + "loss_num": 0.00750732421875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 428448880, + "step": 3392 + }, + { + "epoch": 0.8703905598666068, + "grad_norm": 48.13477325439453, + "learning_rate": 5e-06, + "loss": 0.8885, + "num_input_tokens_seen": 428575512, + "step": 3393 + }, + { + "epoch": 0.8703905598666068, + "loss": 0.7388592958450317, + "loss_ce": 8.973574585979804e-05, + "loss_iou": 0.34375, + "loss_num": 0.0101318359375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 428575512, + "step": 3393 + }, + { + "epoch": 0.8706470852305521, + "grad_norm": 45.76422119140625, + "learning_rate": 5e-06, + "loss": 1.019, + "num_input_tokens_seen": 428701196, + "step": 3394 + }, + { + "epoch": 0.8706470852305521, + "loss": 1.2011606693267822, + "loss_ce": 0.0004772119573317468, + "loss_iou": 0.5390625, + "loss_num": 0.0238037109375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 428701196, + "step": 3394 + }, + { + "epoch": 0.8709036105944975, + "grad_norm": 51.84404754638672, + "learning_rate": 5e-06, + "loss": 0.9424, + "num_input_tokens_seen": 428826248, + "step": 3395 + }, + { + "epoch": 0.8709036105944975, + "loss": 1.1905031204223633, + "loss_ce": 0.0005618068389594555, + "loss_iou": 0.52734375, + "loss_num": 0.027587890625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 428826248, + "step": 3395 + }, + { + "epoch": 0.8711601359584429, + "grad_norm": 39.59621810913086, + "learning_rate": 5e-06, + "loss": 0.9862, + "num_input_tokens_seen": 428952468, + "step": 3396 + }, + { + "epoch": 0.8711601359584429, + "loss": 0.9531468152999878, + "loss_ce": 0.0014866769779473543, + "loss_iou": 0.435546875, + "loss_num": 0.0159912109375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 428952468, + "step": 3396 + }, + { + "epoch": 0.8714166613223883, + "grad_norm": 29.36365509033203, + "learning_rate": 5e-06, + "loss": 0.8585, + "num_input_tokens_seen": 429078520, + "step": 3397 + }, + { + "epoch": 0.8714166613223883, + "loss": 0.9202775955200195, + "loss_ce": 0.0023088508751243353, + "loss_iou": 0.4140625, + "loss_num": 0.0181884765625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 429078520, + "step": 3397 + }, + { + "epoch": 0.8716731866863336, + "grad_norm": 46.7175178527832, + "learning_rate": 5e-06, + "loss": 0.9105, + "num_input_tokens_seen": 429205356, + "step": 3398 + }, + { + "epoch": 0.8716731866863336, + "loss": 0.9669725894927979, + "loss_ce": 0.004081922583281994, + "loss_iou": 0.435546875, + "loss_num": 0.0184326171875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 429205356, + "step": 3398 + }, + { + "epoch": 0.871929712050279, + "grad_norm": 76.26092529296875, + "learning_rate": 5e-06, + "loss": 1.0007, + "num_input_tokens_seen": 429332452, + "step": 3399 + }, + { + "epoch": 0.871929712050279, + "loss": 0.8261502981185913, + "loss_ce": 0.0016874285647645593, + "loss_iou": 0.37890625, + "loss_num": 0.012939453125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 429332452, + "step": 3399 + }, + { + "epoch": 0.8721862374142243, + "grad_norm": 49.71019744873047, + "learning_rate": 5e-06, + "loss": 1.0018, + "num_input_tokens_seen": 429460044, + "step": 3400 + }, + { + "epoch": 0.8721862374142243, + "loss": 1.003648281097412, + "loss_ce": 0.00023035284539218992, + "loss_iou": 0.470703125, + "loss_num": 0.0125732421875, + "loss_xval": 1.0, + "num_input_tokens_seen": 429460044, + "step": 3400 + }, + { + "epoch": 0.8724427627781697, + "grad_norm": 28.346986770629883, + "learning_rate": 5e-06, + "loss": 0.7896, + "num_input_tokens_seen": 429586540, + "step": 3401 + }, + { + "epoch": 0.8724427627781697, + "loss": 0.8010172843933105, + "loss_ce": 0.0021891514770686626, + "loss_iou": 0.37890625, + "loss_num": 0.00811767578125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 429586540, + "step": 3401 + }, + { + "epoch": 0.8726992881421151, + "grad_norm": 40.20914077758789, + "learning_rate": 5e-06, + "loss": 0.8856, + "num_input_tokens_seen": 429711880, + "step": 3402 + }, + { + "epoch": 0.8726992881421151, + "loss": 0.8490281105041504, + "loss_ce": 0.0021042735315859318, + "loss_iou": 0.376953125, + "loss_num": 0.0189208984375, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 429711880, + "step": 3402 + }, + { + "epoch": 0.8729558135060604, + "grad_norm": 49.848392486572266, + "learning_rate": 5e-06, + "loss": 0.86, + "num_input_tokens_seen": 429838232, + "step": 3403 + }, + { + "epoch": 0.8729558135060604, + "loss": 0.88066565990448, + "loss_ce": 0.006642218213528395, + "loss_iou": 0.404296875, + "loss_num": 0.01324462890625, + "loss_xval": 0.875, + "num_input_tokens_seen": 429838232, + "step": 3403 + }, + { + "epoch": 0.8732123388700058, + "grad_norm": 54.214805603027344, + "learning_rate": 5e-06, + "loss": 0.8867, + "num_input_tokens_seen": 429964660, + "step": 3404 + }, + { + "epoch": 0.8732123388700058, + "loss": 1.06544828414917, + "loss_ce": 0.0014834802132099867, + "loss_iou": 0.470703125, + "loss_num": 0.0242919921875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 429964660, + "step": 3404 + }, + { + "epoch": 0.8734688642339511, + "grad_norm": 41.31244659423828, + "learning_rate": 5e-06, + "loss": 1.0468, + "num_input_tokens_seen": 430091912, + "step": 3405 + }, + { + "epoch": 0.8734688642339511, + "loss": 0.9047929644584656, + "loss_ce": 0.0009844072628766298, + "loss_iou": 0.421875, + "loss_num": 0.01190185546875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 430091912, + "step": 3405 + }, + { + "epoch": 0.8737253895978965, + "grad_norm": 23.201696395874023, + "learning_rate": 5e-06, + "loss": 0.9703, + "num_input_tokens_seen": 430218520, + "step": 3406 + }, + { + "epoch": 0.8737253895978965, + "loss": 1.0831247568130493, + "loss_ce": 0.0035349365789443254, + "loss_iou": 0.48828125, + "loss_num": 0.0211181640625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 430218520, + "step": 3406 + }, + { + "epoch": 0.8739819149618419, + "grad_norm": 28.964981079101562, + "learning_rate": 5e-06, + "loss": 0.8775, + "num_input_tokens_seen": 430344328, + "step": 3407 + }, + { + "epoch": 0.8739819149618419, + "loss": 1.0236637592315674, + "loss_ce": 0.0012027935590595007, + "loss_iou": 0.46484375, + "loss_num": 0.018310546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 430344328, + "step": 3407 + }, + { + "epoch": 0.8742384403257872, + "grad_norm": 23.624568939208984, + "learning_rate": 5e-06, + "loss": 0.827, + "num_input_tokens_seen": 430470448, + "step": 3408 + }, + { + "epoch": 0.8742384403257872, + "loss": 0.7088004350662231, + "loss_ce": 0.00030439134570769966, + "loss_iou": 0.3359375, + "loss_num": 0.007476806640625, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 430470448, + "step": 3408 + }, + { + "epoch": 0.8744949656897326, + "grad_norm": 31.541032791137695, + "learning_rate": 5e-06, + "loss": 0.9492, + "num_input_tokens_seen": 430596828, + "step": 3409 + }, + { + "epoch": 0.8744949656897326, + "loss": 1.0792485475540161, + "loss_ce": 0.0023442874662578106, + "loss_iou": 0.46484375, + "loss_num": 0.0296630859375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 430596828, + "step": 3409 + }, + { + "epoch": 0.8747514910536779, + "grad_norm": 42.293907165527344, + "learning_rate": 5e-06, + "loss": 0.9031, + "num_input_tokens_seen": 430723744, + "step": 3410 + }, + { + "epoch": 0.8747514910536779, + "loss": 0.8873423337936401, + "loss_ce": 0.00013531590229831636, + "loss_iou": 0.408203125, + "loss_num": 0.0137939453125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 430723744, + "step": 3410 + }, + { + "epoch": 0.8750080164176233, + "grad_norm": 47.99876403808594, + "learning_rate": 5e-06, + "loss": 0.9119, + "num_input_tokens_seen": 430849488, + "step": 3411 + }, + { + "epoch": 0.8750080164176233, + "loss": 0.8992279767990112, + "loss_ce": 0.00030217270250432193, + "loss_iou": 0.427734375, + "loss_num": 0.00836181640625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 430849488, + "step": 3411 + }, + { + "epoch": 0.8752645417815687, + "grad_norm": 46.01728439331055, + "learning_rate": 5e-06, + "loss": 0.9561, + "num_input_tokens_seen": 430975304, + "step": 3412 + }, + { + "epoch": 0.8752645417815687, + "loss": 0.984878420829773, + "loss_ce": 0.000991659821011126, + "loss_iou": 0.45703125, + "loss_num": 0.0137939453125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 430975304, + "step": 3412 + }, + { + "epoch": 0.875521067145514, + "grad_norm": 26.812801361083984, + "learning_rate": 5e-06, + "loss": 0.9157, + "num_input_tokens_seen": 431099984, + "step": 3413 + }, + { + "epoch": 0.875521067145514, + "loss": 0.67731773853302, + "loss_ce": 0.0008040264947339892, + "loss_iou": 0.310546875, + "loss_num": 0.01080322265625, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 431099984, + "step": 3413 + }, + { + "epoch": 0.8757775925094594, + "grad_norm": 15.79684066772461, + "learning_rate": 5e-06, + "loss": 0.7982, + "num_input_tokens_seen": 431225296, + "step": 3414 + }, + { + "epoch": 0.8757775925094594, + "loss": 0.8126412034034729, + "loss_ce": 0.0011177434353157878, + "loss_iou": 0.373046875, + "loss_num": 0.01275634765625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 431225296, + "step": 3414 + }, + { + "epoch": 0.8760341178734047, + "grad_norm": 15.332484245300293, + "learning_rate": 5e-06, + "loss": 0.9039, + "num_input_tokens_seen": 431350240, + "step": 3415 + }, + { + "epoch": 0.8760341178734047, + "loss": 0.8000078201293945, + "loss_ce": 0.0011796900071203709, + "loss_iou": 0.37109375, + "loss_num": 0.01123046875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 431350240, + "step": 3415 + }, + { + "epoch": 0.8762906432373501, + "grad_norm": 15.827414512634277, + "learning_rate": 5e-06, + "loss": 0.8506, + "num_input_tokens_seen": 431475596, + "step": 3416 + }, + { + "epoch": 0.8762906432373501, + "loss": 0.8470059633255005, + "loss_ce": 0.00032629843917675316, + "loss_iou": 0.3984375, + "loss_num": 0.0098876953125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 431475596, + "step": 3416 + }, + { + "epoch": 0.8765471686012954, + "grad_norm": 80.7283706665039, + "learning_rate": 5e-06, + "loss": 0.8849, + "num_input_tokens_seen": 431601064, + "step": 3417 + }, + { + "epoch": 0.8765471686012954, + "loss": 0.7679038643836975, + "loss_ce": 8.159335993696004e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0128173828125, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 431601064, + "step": 3417 + }, + { + "epoch": 0.8768036939652408, + "grad_norm": 49.230072021484375, + "learning_rate": 5e-06, + "loss": 0.9348, + "num_input_tokens_seen": 431726572, + "step": 3418 + }, + { + "epoch": 0.8768036939652408, + "loss": 0.8360693454742432, + "loss_ce": 0.001108458498492837, + "loss_iou": 0.40234375, + "loss_num": 0.00604248046875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 431726572, + "step": 3418 + }, + { + "epoch": 0.8770602193291862, + "grad_norm": 48.07963562011719, + "learning_rate": 5e-06, + "loss": 0.8866, + "num_input_tokens_seen": 431854524, + "step": 3419 + }, + { + "epoch": 0.8770602193291862, + "loss": 0.9888432621955872, + "loss_ce": 0.0005619989824481308, + "loss_iou": 0.44921875, + "loss_num": 0.017822265625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 431854524, + "step": 3419 + }, + { + "epoch": 0.8773167446931315, + "grad_norm": 53.985050201416016, + "learning_rate": 5e-06, + "loss": 0.8852, + "num_input_tokens_seen": 431981668, + "step": 3420 + }, + { + "epoch": 0.8773167446931315, + "loss": 0.9037037491798401, + "loss_ce": 0.0003834692179225385, + "loss_iou": 0.419921875, + "loss_num": 0.0125732421875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 431981668, + "step": 3420 + }, + { + "epoch": 0.8775732700570769, + "grad_norm": 44.53376770019531, + "learning_rate": 5e-06, + "loss": 0.9842, + "num_input_tokens_seen": 432108028, + "step": 3421 + }, + { + "epoch": 0.8775732700570769, + "loss": 0.8888950347900391, + "loss_ce": 0.0002231486578239128, + "loss_iou": 0.416015625, + "loss_num": 0.01129150390625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 432108028, + "step": 3421 + }, + { + "epoch": 0.8778297954210222, + "grad_norm": 47.403507232666016, + "learning_rate": 5e-06, + "loss": 0.8334, + "num_input_tokens_seen": 432234736, + "step": 3422 + }, + { + "epoch": 0.8778297954210222, + "loss": 0.8107150793075562, + "loss_ce": 0.0009006505133584142, + "loss_iou": 0.375, + "loss_num": 0.01239013671875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 432234736, + "step": 3422 + }, + { + "epoch": 0.8780863207849676, + "grad_norm": 49.67152786254883, + "learning_rate": 5e-06, + "loss": 0.9719, + "num_input_tokens_seen": 432360092, + "step": 3423 + }, + { + "epoch": 0.8780863207849676, + "loss": 1.1102666854858398, + "loss_ce": 0.0018682765075936913, + "loss_iou": 0.49609375, + "loss_num": 0.0230712890625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 432360092, + "step": 3423 + }, + { + "epoch": 0.878342846148913, + "grad_norm": 51.985137939453125, + "learning_rate": 5e-06, + "loss": 0.8359, + "num_input_tokens_seen": 432486900, + "step": 3424 + }, + { + "epoch": 0.878342846148913, + "loss": 0.8454709053039551, + "loss_ce": 0.002697472693398595, + "loss_iou": 0.396484375, + "loss_num": 0.00970458984375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 432486900, + "step": 3424 + }, + { + "epoch": 0.8785993715128584, + "grad_norm": 59.84661102294922, + "learning_rate": 5e-06, + "loss": 0.8764, + "num_input_tokens_seen": 432612772, + "step": 3425 + }, + { + "epoch": 0.8785993715128584, + "loss": 0.926160991191864, + "loss_ce": 0.000379768869606778, + "loss_iou": 0.4375, + "loss_num": 0.010498046875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 432612772, + "step": 3425 + }, + { + "epoch": 0.8788558968768037, + "grad_norm": 32.007080078125, + "learning_rate": 5e-06, + "loss": 0.8649, + "num_input_tokens_seen": 432737984, + "step": 3426 + }, + { + "epoch": 0.8788558968768037, + "loss": 0.7906807065010071, + "loss_ce": 0.0018623414216563106, + "loss_iou": 0.357421875, + "loss_num": 0.0146484375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 432737984, + "step": 3426 + }, + { + "epoch": 0.879112422240749, + "grad_norm": 49.53152084350586, + "learning_rate": 5e-06, + "loss": 0.8325, + "num_input_tokens_seen": 432864872, + "step": 3427 + }, + { + "epoch": 0.879112422240749, + "loss": 0.9690131545066833, + "loss_ce": 0.0002631854440551251, + "loss_iou": 0.455078125, + "loss_num": 0.011474609375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 432864872, + "step": 3427 + }, + { + "epoch": 0.8793689476046944, + "grad_norm": 69.46311950683594, + "learning_rate": 5e-06, + "loss": 1.0001, + "num_input_tokens_seen": 432990672, + "step": 3428 + }, + { + "epoch": 0.8793689476046944, + "loss": 0.9777381420135498, + "loss_ce": 0.00019912939751520753, + "loss_iou": 0.451171875, + "loss_num": 0.01531982421875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 432990672, + "step": 3428 + }, + { + "epoch": 0.8796254729686398, + "grad_norm": 55.93959426879883, + "learning_rate": 5e-06, + "loss": 0.8758, + "num_input_tokens_seen": 433117724, + "step": 3429 + }, + { + "epoch": 0.8796254729686398, + "loss": 0.8691259622573853, + "loss_ce": 0.00047367080696858466, + "loss_iou": 0.41015625, + "loss_num": 0.0098876953125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 433117724, + "step": 3429 + }, + { + "epoch": 0.8798819983325852, + "grad_norm": 41.18526077270508, + "learning_rate": 5e-06, + "loss": 0.7981, + "num_input_tokens_seen": 433243388, + "step": 3430 + }, + { + "epoch": 0.8798819983325852, + "loss": 0.6913057565689087, + "loss_ce": 0.00014360120985656977, + "loss_iou": 0.314453125, + "loss_num": 0.01239013671875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 433243388, + "step": 3430 + }, + { + "epoch": 0.8801385236965305, + "grad_norm": 42.80875015258789, + "learning_rate": 5e-06, + "loss": 0.944, + "num_input_tokens_seen": 433370024, + "step": 3431 + }, + { + "epoch": 0.8801385236965305, + "loss": 1.0949082374572754, + "loss_ce": 0.0026230562943965197, + "loss_iou": 0.5, + "loss_num": 0.0185546875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 433370024, + "step": 3431 + }, + { + "epoch": 0.8803950490604758, + "grad_norm": 59.134490966796875, + "learning_rate": 5e-06, + "loss": 0.8771, + "num_input_tokens_seen": 433496936, + "step": 3432 + }, + { + "epoch": 0.8803950490604758, + "loss": 0.8126804828643799, + "loss_ce": 0.0016453824937343597, + "loss_iou": 0.380859375, + "loss_num": 0.0098876953125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 433496936, + "step": 3432 + }, + { + "epoch": 0.8806515744244212, + "grad_norm": 46.1022834777832, + "learning_rate": 5e-06, + "loss": 0.9529, + "num_input_tokens_seen": 433622760, + "step": 3433 + }, + { + "epoch": 0.8806515744244212, + "loss": 0.9846500158309937, + "loss_ce": 0.0002750523271970451, + "loss_iou": 0.45703125, + "loss_num": 0.01373291015625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 433622760, + "step": 3433 + }, + { + "epoch": 0.8809080997883666, + "grad_norm": 22.375951766967773, + "learning_rate": 5e-06, + "loss": 0.9513, + "num_input_tokens_seen": 433748540, + "step": 3434 + }, + { + "epoch": 0.8809080997883666, + "loss": 0.9501669406890869, + "loss_ce": 0.0038778341840952635, + "loss_iou": 0.43359375, + "loss_num": 0.0157470703125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 433748540, + "step": 3434 + }, + { + "epoch": 0.881164625152312, + "grad_norm": 17.282991409301758, + "learning_rate": 5e-06, + "loss": 0.8549, + "num_input_tokens_seen": 433874084, + "step": 3435 + }, + { + "epoch": 0.881164625152312, + "loss": 0.8531233072280884, + "loss_ce": 0.0005841834936290979, + "loss_iou": 0.390625, + "loss_num": 0.0142822265625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 433874084, + "step": 3435 + }, + { + "epoch": 0.8814211505162572, + "grad_norm": 28.396881103515625, + "learning_rate": 5e-06, + "loss": 0.9027, + "num_input_tokens_seen": 434000316, + "step": 3436 + }, + { + "epoch": 0.8814211505162572, + "loss": 0.8971639275550842, + "loss_ce": 0.0001912479056045413, + "loss_iou": 0.427734375, + "loss_num": 0.00823974609375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 434000316, + "step": 3436 + }, + { + "epoch": 0.8816776758802026, + "grad_norm": 42.88393783569336, + "learning_rate": 5e-06, + "loss": 0.9059, + "num_input_tokens_seen": 434126808, + "step": 3437 + }, + { + "epoch": 0.8816776758802026, + "loss": 0.8238965272903442, + "loss_ce": 0.004560566507279873, + "loss_iou": 0.3828125, + "loss_num": 0.010986328125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 434126808, + "step": 3437 + }, + { + "epoch": 0.881934201244148, + "grad_norm": 57.59833526611328, + "learning_rate": 5e-06, + "loss": 0.8468, + "num_input_tokens_seen": 434252744, + "step": 3438 + }, + { + "epoch": 0.881934201244148, + "loss": 0.8683846592903137, + "loss_ce": 0.0051033878698945045, + "loss_iou": 0.396484375, + "loss_num": 0.0137939453125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 434252744, + "step": 3438 + }, + { + "epoch": 0.8821907266080934, + "grad_norm": 38.1158561706543, + "learning_rate": 5e-06, + "loss": 1.1038, + "num_input_tokens_seen": 434379080, + "step": 3439 + }, + { + "epoch": 0.8821907266080934, + "loss": 1.0591309070587158, + "loss_ce": 0.001513793016783893, + "loss_iou": 0.484375, + "loss_num": 0.0179443359375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 434379080, + "step": 3439 + }, + { + "epoch": 0.8824472519720388, + "grad_norm": 29.21666145324707, + "learning_rate": 5e-06, + "loss": 0.9735, + "num_input_tokens_seen": 434505172, + "step": 3440 + }, + { + "epoch": 0.8824472519720388, + "loss": 0.7836111187934875, + "loss_ce": 0.00040801268187351525, + "loss_iou": 0.365234375, + "loss_num": 0.01025390625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 434505172, + "step": 3440 + }, + { + "epoch": 0.882703777335984, + "grad_norm": 233.26312255859375, + "learning_rate": 5e-06, + "loss": 0.9878, + "num_input_tokens_seen": 434630964, + "step": 3441 + }, + { + "epoch": 0.882703777335984, + "loss": 1.0533232688903809, + "loss_ce": 0.0015654661692678928, + "loss_iou": 0.45703125, + "loss_num": 0.0269775390625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 434630964, + "step": 3441 + }, + { + "epoch": 0.8829603026999294, + "grad_norm": 24.941059112548828, + "learning_rate": 5e-06, + "loss": 0.8391, + "num_input_tokens_seen": 434756576, + "step": 3442 + }, + { + "epoch": 0.8829603026999294, + "loss": 0.929535984992981, + "loss_ce": 0.002289894036948681, + "loss_iou": 0.42578125, + "loss_num": 0.01483154296875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 434756576, + "step": 3442 + }, + { + "epoch": 0.8832168280638748, + "grad_norm": 29.161884307861328, + "learning_rate": 5e-06, + "loss": 0.8047, + "num_input_tokens_seen": 434883464, + "step": 3443 + }, + { + "epoch": 0.8832168280638748, + "loss": 0.7673634886741638, + "loss_ce": 0.0012501779710873961, + "loss_iou": 0.359375, + "loss_num": 0.0091552734375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 434883464, + "step": 3443 + }, + { + "epoch": 0.8834733534278202, + "grad_norm": 38.47393798828125, + "learning_rate": 5e-06, + "loss": 0.8436, + "num_input_tokens_seen": 435008456, + "step": 3444 + }, + { + "epoch": 0.8834733534278202, + "loss": 0.7271755337715149, + "loss_ce": 0.00012476168922148645, + "loss_iou": 0.34765625, + "loss_num": 0.00653076171875, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 435008456, + "step": 3444 + }, + { + "epoch": 0.8837298787917656, + "grad_norm": 46.281898498535156, + "learning_rate": 5e-06, + "loss": 0.8872, + "num_input_tokens_seen": 435135056, + "step": 3445 + }, + { + "epoch": 0.8837298787917656, + "loss": 0.8790836334228516, + "loss_ce": 0.0006656663026660681, + "loss_iou": 0.416015625, + "loss_num": 0.0093994140625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 435135056, + "step": 3445 + }, + { + "epoch": 0.883986404155711, + "grad_norm": 46.61651611328125, + "learning_rate": 5e-06, + "loss": 0.8841, + "num_input_tokens_seen": 435260520, + "step": 3446 + }, + { + "epoch": 0.883986404155711, + "loss": 0.7990092039108276, + "loss_ce": 0.0006693446775898337, + "loss_iou": 0.37890625, + "loss_num": 0.0081787109375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 435260520, + "step": 3446 + }, + { + "epoch": 0.8842429295196562, + "grad_norm": 33.788658142089844, + "learning_rate": 5e-06, + "loss": 0.9245, + "num_input_tokens_seen": 435386344, + "step": 3447 + }, + { + "epoch": 0.8842429295196562, + "loss": 1.005658745765686, + "loss_ce": 0.004682211205363274, + "loss_iou": 0.466796875, + "loss_num": 0.01385498046875, + "loss_xval": 1.0, + "num_input_tokens_seen": 435386344, + "step": 3447 + }, + { + "epoch": 0.8844994548836016, + "grad_norm": 47.8157844543457, + "learning_rate": 5e-06, + "loss": 0.9217, + "num_input_tokens_seen": 435512352, + "step": 3448 + }, + { + "epoch": 0.8844994548836016, + "loss": 1.0116376876831055, + "loss_ce": 0.00040726314182393253, + "loss_iou": 0.46484375, + "loss_num": 0.01611328125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 435512352, + "step": 3448 + }, + { + "epoch": 0.884755980247547, + "grad_norm": 40.95292663574219, + "learning_rate": 5e-06, + "loss": 0.9506, + "num_input_tokens_seen": 435638276, + "step": 3449 + }, + { + "epoch": 0.884755980247547, + "loss": 1.003424882888794, + "loss_ce": 0.001960084307938814, + "loss_iou": 0.453125, + "loss_num": 0.018798828125, + "loss_xval": 1.0, + "num_input_tokens_seen": 435638276, + "step": 3449 + }, + { + "epoch": 0.8850125056114924, + "grad_norm": 22.41740608215332, + "learning_rate": 5e-06, + "loss": 0.9177, + "num_input_tokens_seen": 435763928, + "step": 3450 + }, + { + "epoch": 0.8850125056114924, + "loss": 0.9013574123382568, + "loss_ce": 0.00267581082880497, + "loss_iou": 0.42578125, + "loss_num": 0.0096435546875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 435763928, + "step": 3450 + }, + { + "epoch": 0.8852690309754377, + "grad_norm": 36.64149856567383, + "learning_rate": 5e-06, + "loss": 0.8779, + "num_input_tokens_seen": 435889656, + "step": 3451 + }, + { + "epoch": 0.8852690309754377, + "loss": 0.8626788854598999, + "loss_ce": 0.0003742373373825103, + "loss_iou": 0.3984375, + "loss_num": 0.0133056640625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 435889656, + "step": 3451 + }, + { + "epoch": 0.885525556339383, + "grad_norm": 61.63970184326172, + "learning_rate": 5e-06, + "loss": 0.977, + "num_input_tokens_seen": 436015808, + "step": 3452 + }, + { + "epoch": 0.885525556339383, + "loss": 0.9916138052940369, + "loss_ce": 0.0004028629628010094, + "loss_iou": 0.4609375, + "loss_num": 0.01416015625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 436015808, + "step": 3452 + }, + { + "epoch": 0.8857820817033284, + "grad_norm": 44.72106170654297, + "learning_rate": 5e-06, + "loss": 1.0507, + "num_input_tokens_seen": 436141952, + "step": 3453 + }, + { + "epoch": 0.8857820817033284, + "loss": 1.0103474855422974, + "loss_ce": 0.0005818564677610993, + "loss_iou": 0.46484375, + "loss_num": 0.0159912109375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 436141952, + "step": 3453 + }, + { + "epoch": 0.8860386070672738, + "grad_norm": 38.93879318237305, + "learning_rate": 5e-06, + "loss": 0.9543, + "num_input_tokens_seen": 436268232, + "step": 3454 + }, + { + "epoch": 0.8860386070672738, + "loss": 0.8700358867645264, + "loss_ce": 0.0028483986388891935, + "loss_iou": 0.41015625, + "loss_num": 0.009521484375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 436268232, + "step": 3454 + }, + { + "epoch": 0.8862951324312192, + "grad_norm": 40.77846145629883, + "learning_rate": 5e-06, + "loss": 1.0358, + "num_input_tokens_seen": 436394084, + "step": 3455 + }, + { + "epoch": 0.8862951324312192, + "loss": 0.9436060190200806, + "loss_ce": 0.0017114478396251798, + "loss_iou": 0.41796875, + "loss_num": 0.0213623046875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 436394084, + "step": 3455 + }, + { + "epoch": 0.8865516577951645, + "grad_norm": 54.384544372558594, + "learning_rate": 5e-06, + "loss": 0.9185, + "num_input_tokens_seen": 436520584, + "step": 3456 + }, + { + "epoch": 0.8865516577951645, + "loss": 0.839960515499115, + "loss_ce": 0.0010933056473731995, + "loss_iou": 0.400390625, + "loss_num": 0.007537841796875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 436520584, + "step": 3456 + }, + { + "epoch": 0.8868081831591098, + "grad_norm": 49.29331588745117, + "learning_rate": 5e-06, + "loss": 0.9414, + "num_input_tokens_seen": 436646308, + "step": 3457 + }, + { + "epoch": 0.8868081831591098, + "loss": 1.080345630645752, + "loss_ce": 0.0012440603459253907, + "loss_iou": 0.5, + "loss_num": 0.015380859375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 436646308, + "step": 3457 + }, + { + "epoch": 0.8870647085230552, + "grad_norm": 53.06224060058594, + "learning_rate": 5e-06, + "loss": 0.9215, + "num_input_tokens_seen": 436772312, + "step": 3458 + }, + { + "epoch": 0.8870647085230552, + "loss": 1.0571246147155762, + "loss_ce": 0.0009722586255520582, + "loss_iou": 0.48046875, + "loss_num": 0.0189208984375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 436772312, + "step": 3458 + }, + { + "epoch": 0.8873212338870006, + "grad_norm": 56.29994583129883, + "learning_rate": 5e-06, + "loss": 0.9182, + "num_input_tokens_seen": 436899012, + "step": 3459 + }, + { + "epoch": 0.8873212338870006, + "loss": 0.8147017955780029, + "loss_ce": 0.00024863381986506283, + "loss_iou": 0.37890625, + "loss_num": 0.0115966796875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 436899012, + "step": 3459 + }, + { + "epoch": 0.887577759250946, + "grad_norm": 62.93098831176758, + "learning_rate": 5e-06, + "loss": 1.0228, + "num_input_tokens_seen": 437025312, + "step": 3460 + }, + { + "epoch": 0.887577759250946, + "loss": 0.9808425903320312, + "loss_ce": 0.0015944740734994411, + "loss_iou": 0.443359375, + "loss_num": 0.0184326171875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 437025312, + "step": 3460 + }, + { + "epoch": 0.8878342846148913, + "grad_norm": 60.884056091308594, + "learning_rate": 5e-06, + "loss": 1.0027, + "num_input_tokens_seen": 437152084, + "step": 3461 + }, + { + "epoch": 0.8878342846148913, + "loss": 1.1948078870773315, + "loss_ce": 0.0004719930002465844, + "loss_iou": 0.5390625, + "loss_num": 0.0240478515625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 437152084, + "step": 3461 + }, + { + "epoch": 0.8880908099788366, + "grad_norm": 59.38124084472656, + "learning_rate": 5e-06, + "loss": 0.952, + "num_input_tokens_seen": 437277848, + "step": 3462 + }, + { + "epoch": 0.8880908099788366, + "loss": 0.9059023261070251, + "loss_ce": 0.0006288869772106409, + "loss_iou": 0.431640625, + "loss_num": 0.008544921875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 437277848, + "step": 3462 + }, + { + "epoch": 0.888347335342782, + "grad_norm": 38.98711013793945, + "learning_rate": 5e-06, + "loss": 0.8381, + "num_input_tokens_seen": 437402532, + "step": 3463 + }, + { + "epoch": 0.888347335342782, + "loss": 0.8584575057029724, + "loss_ce": 0.0007915201713331044, + "loss_iou": 0.408203125, + "loss_num": 0.0079345703125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 437402532, + "step": 3463 + }, + { + "epoch": 0.8886038607067274, + "grad_norm": 27.732324600219727, + "learning_rate": 5e-06, + "loss": 0.9388, + "num_input_tokens_seen": 437528956, + "step": 3464 + }, + { + "epoch": 0.8886038607067274, + "loss": 0.9284608960151672, + "loss_ce": 0.005853438284248114, + "loss_iou": 0.4140625, + "loss_num": 0.0186767578125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 437528956, + "step": 3464 + }, + { + "epoch": 0.8888603860706727, + "grad_norm": 50.30615997314453, + "learning_rate": 5e-06, + "loss": 0.8218, + "num_input_tokens_seen": 437655220, + "step": 3465 + }, + { + "epoch": 0.8888603860706727, + "loss": 0.8709135055541992, + "loss_ce": 0.0003080573515035212, + "loss_iou": 0.39453125, + "loss_num": 0.0166015625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 437655220, + "step": 3465 + }, + { + "epoch": 0.8891169114346181, + "grad_norm": 51.125362396240234, + "learning_rate": 5e-06, + "loss": 0.9267, + "num_input_tokens_seen": 437781580, + "step": 3466 + }, + { + "epoch": 0.8891169114346181, + "loss": 0.846234142780304, + "loss_ce": 0.0015075721312314272, + "loss_iou": 0.3984375, + "loss_num": 0.009521484375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 437781580, + "step": 3466 + }, + { + "epoch": 0.8893734367985635, + "grad_norm": 43.50301742553711, + "learning_rate": 5e-06, + "loss": 0.9021, + "num_input_tokens_seen": 437908052, + "step": 3467 + }, + { + "epoch": 0.8893734367985635, + "loss": 1.0447622537612915, + "loss_ce": 0.005211474373936653, + "loss_iou": 0.482421875, + "loss_num": 0.01470947265625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 437908052, + "step": 3467 + }, + { + "epoch": 0.8896299621625088, + "grad_norm": 50.587364196777344, + "learning_rate": 5e-06, + "loss": 0.8495, + "num_input_tokens_seen": 438034808, + "step": 3468 + }, + { + "epoch": 0.8896299621625088, + "loss": 0.9229599833488464, + "loss_ce": 0.001817358541302383, + "loss_iou": 0.43359375, + "loss_num": 0.0108642578125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 438034808, + "step": 3468 + }, + { + "epoch": 0.8898864875264542, + "grad_norm": 52.134674072265625, + "learning_rate": 5e-06, + "loss": 0.9262, + "num_input_tokens_seen": 438161648, + "step": 3469 + }, + { + "epoch": 0.8898864875264542, + "loss": 1.0784614086151123, + "loss_ce": 0.0013129300205036998, + "loss_iou": 0.5, + "loss_num": 0.015869140625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 438161648, + "step": 3469 + }, + { + "epoch": 0.8901430128903995, + "grad_norm": 52.15785217285156, + "learning_rate": 5e-06, + "loss": 0.9972, + "num_input_tokens_seen": 438287680, + "step": 3470 + }, + { + "epoch": 0.8901430128903995, + "loss": 0.8080508708953857, + "loss_ce": 0.00043369480408728123, + "loss_iou": 0.390625, + "loss_num": 0.00555419921875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 438287680, + "step": 3470 + }, + { + "epoch": 0.8903995382543449, + "grad_norm": 46.73903274536133, + "learning_rate": 5e-06, + "loss": 0.9506, + "num_input_tokens_seen": 438414072, + "step": 3471 + }, + { + "epoch": 0.8903995382543449, + "loss": 0.8236591815948486, + "loss_ce": 0.00041702031739987433, + "loss_iou": 0.39453125, + "loss_num": 0.007080078125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 438414072, + "step": 3471 + }, + { + "epoch": 0.8906560636182903, + "grad_norm": 91.40861511230469, + "learning_rate": 5e-06, + "loss": 0.9469, + "num_input_tokens_seen": 438541444, + "step": 3472 + }, + { + "epoch": 0.8906560636182903, + "loss": 1.0221894979476929, + "loss_ce": 0.002169941784814, + "loss_iou": 0.451171875, + "loss_num": 0.0240478515625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 438541444, + "step": 3472 + }, + { + "epoch": 0.8909125889822356, + "grad_norm": 42.283748626708984, + "learning_rate": 5e-06, + "loss": 1.0101, + "num_input_tokens_seen": 438667676, + "step": 3473 + }, + { + "epoch": 0.8909125889822356, + "loss": 1.1288270950317383, + "loss_ce": 0.002362252678722143, + "loss_iou": 0.51171875, + "loss_num": 0.020751953125, + "loss_xval": 1.125, + "num_input_tokens_seen": 438667676, + "step": 3473 + }, + { + "epoch": 0.891169114346181, + "grad_norm": 28.7772274017334, + "learning_rate": 5e-06, + "loss": 0.8939, + "num_input_tokens_seen": 438793980, + "step": 3474 + }, + { + "epoch": 0.891169114346181, + "loss": 1.1132316589355469, + "loss_ce": 0.0028801592998206615, + "loss_iou": 0.48828125, + "loss_num": 0.0263671875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 438793980, + "step": 3474 + }, + { + "epoch": 0.8914256397101263, + "grad_norm": 40.3843879699707, + "learning_rate": 5e-06, + "loss": 0.9598, + "num_input_tokens_seen": 438921068, + "step": 3475 + }, + { + "epoch": 0.8914256397101263, + "loss": 0.8080883026123047, + "loss_ce": 0.000959338212851435, + "loss_iou": 0.37890625, + "loss_num": 0.009765625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 438921068, + "step": 3475 + }, + { + "epoch": 0.8916821650740717, + "grad_norm": 52.686180114746094, + "learning_rate": 5e-06, + "loss": 0.8949, + "num_input_tokens_seen": 439047024, + "step": 3476 + }, + { + "epoch": 0.8916821650740717, + "loss": 1.0003409385681152, + "loss_ce": 0.0008292689453810453, + "loss_iou": 0.45703125, + "loss_num": 0.01708984375, + "loss_xval": 1.0, + "num_input_tokens_seen": 439047024, + "step": 3476 + }, + { + "epoch": 0.8919386904380171, + "grad_norm": 48.27030563354492, + "learning_rate": 5e-06, + "loss": 0.9501, + "num_input_tokens_seen": 439173668, + "step": 3477 + }, + { + "epoch": 0.8919386904380171, + "loss": 0.9072990417480469, + "loss_ce": 0.0010490596760064363, + "loss_iou": 0.41796875, + "loss_num": 0.01416015625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 439173668, + "step": 3477 + }, + { + "epoch": 0.8921952158019624, + "grad_norm": 56.81381607055664, + "learning_rate": 5e-06, + "loss": 0.8105, + "num_input_tokens_seen": 439299416, + "step": 3478 + }, + { + "epoch": 0.8921952158019624, + "loss": 0.822077751159668, + "loss_ce": 0.0003003695164807141, + "loss_iou": 0.390625, + "loss_num": 0.0079345703125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 439299416, + "step": 3478 + }, + { + "epoch": 0.8924517411659078, + "grad_norm": 36.58035659790039, + "learning_rate": 5e-06, + "loss": 0.85, + "num_input_tokens_seen": 439425120, + "step": 3479 + }, + { + "epoch": 0.8924517411659078, + "loss": 0.8668330907821655, + "loss_ce": 0.00013380752352532, + "loss_iou": 0.38671875, + "loss_num": 0.0186767578125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 439425120, + "step": 3479 + }, + { + "epoch": 0.8927082665298531, + "grad_norm": 37.54542541503906, + "learning_rate": 5e-06, + "loss": 0.956, + "num_input_tokens_seen": 439551672, + "step": 3480 + }, + { + "epoch": 0.8927082665298531, + "loss": 1.0737719535827637, + "loss_ce": 0.0019945912063121796, + "loss_iou": 0.486328125, + "loss_num": 0.0196533203125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 439551672, + "step": 3480 + }, + { + "epoch": 0.8929647918937985, + "grad_norm": 43.92566680908203, + "learning_rate": 5e-06, + "loss": 0.8286, + "num_input_tokens_seen": 439677248, + "step": 3481 + }, + { + "epoch": 0.8929647918937985, + "loss": 0.9804304242134094, + "loss_ce": 0.0014264786150306463, + "loss_iou": 0.431640625, + "loss_num": 0.0233154296875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 439677248, + "step": 3481 + }, + { + "epoch": 0.8932213172577439, + "grad_norm": 100.75383758544922, + "learning_rate": 5e-06, + "loss": 0.8593, + "num_input_tokens_seen": 439803464, + "step": 3482 + }, + { + "epoch": 0.8932213172577439, + "loss": 0.7051931619644165, + "loss_ce": 0.00035918079083785415, + "loss_iou": 0.330078125, + "loss_num": 0.0089111328125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 439803464, + "step": 3482 + }, + { + "epoch": 0.8934778426216892, + "grad_norm": 48.23073959350586, + "learning_rate": 5e-06, + "loss": 0.9091, + "num_input_tokens_seen": 439929676, + "step": 3483 + }, + { + "epoch": 0.8934778426216892, + "loss": 0.8114031553268433, + "loss_ce": 0.00012387189781293273, + "loss_iou": 0.361328125, + "loss_num": 0.0177001953125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 439929676, + "step": 3483 + }, + { + "epoch": 0.8937343679856345, + "grad_norm": 40.21246337890625, + "learning_rate": 5e-06, + "loss": 0.9172, + "num_input_tokens_seen": 440056060, + "step": 3484 + }, + { + "epoch": 0.8937343679856345, + "loss": 0.9171539545059204, + "loss_ce": 0.00016176214558072388, + "loss_iou": 0.423828125, + "loss_num": 0.0135498046875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 440056060, + "step": 3484 + }, + { + "epoch": 0.8939908933495799, + "grad_norm": 35.6041374206543, + "learning_rate": 5e-06, + "loss": 0.9138, + "num_input_tokens_seen": 440182060, + "step": 3485 + }, + { + "epoch": 0.8939908933495799, + "loss": 0.8882452249526978, + "loss_ce": 0.0003057711583096534, + "loss_iou": 0.412109375, + "loss_num": 0.012939453125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 440182060, + "step": 3485 + }, + { + "epoch": 0.8942474187135253, + "grad_norm": 32.432220458984375, + "learning_rate": 5e-06, + "loss": 0.8823, + "num_input_tokens_seen": 440307220, + "step": 3486 + }, + { + "epoch": 0.8942474187135253, + "loss": 1.074488878250122, + "loss_ce": 0.0017349713016301394, + "loss_iou": 0.49609375, + "loss_num": 0.01611328125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 440307220, + "step": 3486 + }, + { + "epoch": 0.8945039440774707, + "grad_norm": 39.51986312866211, + "learning_rate": 5e-06, + "loss": 1.0157, + "num_input_tokens_seen": 440432000, + "step": 3487 + }, + { + "epoch": 0.8945039440774707, + "loss": 0.9156252145767212, + "loss_ce": 0.004492382984608412, + "loss_iou": 0.42578125, + "loss_num": 0.0120849609375, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 440432000, + "step": 3487 + }, + { + "epoch": 0.8947604694414161, + "grad_norm": 41.696922302246094, + "learning_rate": 5e-06, + "loss": 0.9143, + "num_input_tokens_seen": 440558472, + "step": 3488 + }, + { + "epoch": 0.8947604694414161, + "loss": 0.8949086666107178, + "loss_ce": 0.0030630186665803194, + "loss_iou": 0.396484375, + "loss_num": 0.01953125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 440558472, + "step": 3488 + }, + { + "epoch": 0.8950169948053613, + "grad_norm": 53.55394744873047, + "learning_rate": 5e-06, + "loss": 1.0735, + "num_input_tokens_seen": 440684684, + "step": 3489 + }, + { + "epoch": 0.8950169948053613, + "loss": 1.0796884298324585, + "loss_ce": 0.003028303850442171, + "loss_iou": 0.47265625, + "loss_num": 0.0262451171875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 440684684, + "step": 3489 + }, + { + "epoch": 0.8952735201693067, + "grad_norm": 54.80255126953125, + "learning_rate": 5e-06, + "loss": 1.046, + "num_input_tokens_seen": 440809900, + "step": 3490 + }, + { + "epoch": 0.8952735201693067, + "loss": 1.0688905715942383, + "loss_ce": 0.0002870369062293321, + "loss_iou": 0.474609375, + "loss_num": 0.0238037109375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 440809900, + "step": 3490 + }, + { + "epoch": 0.8955300455332521, + "grad_norm": 63.48841857910156, + "learning_rate": 5e-06, + "loss": 0.9201, + "num_input_tokens_seen": 440937392, + "step": 3491 + }, + { + "epoch": 0.8955300455332521, + "loss": 0.8465408086776733, + "loss_ce": 0.0013259402476251125, + "loss_iou": 0.392578125, + "loss_num": 0.0118408203125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 440937392, + "step": 3491 + }, + { + "epoch": 0.8957865708971975, + "grad_norm": 53.399166107177734, + "learning_rate": 5e-06, + "loss": 0.953, + "num_input_tokens_seen": 441064292, + "step": 3492 + }, + { + "epoch": 0.8957865708971975, + "loss": 0.7807327508926392, + "loss_ce": 0.0004593436897266656, + "loss_iou": 0.375, + "loss_num": 0.00616455078125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 441064292, + "step": 3492 + }, + { + "epoch": 0.8960430962611429, + "grad_norm": 31.022838592529297, + "learning_rate": 5e-06, + "loss": 0.8332, + "num_input_tokens_seen": 441189936, + "step": 3493 + }, + { + "epoch": 0.8960430962611429, + "loss": 0.8196207284927368, + "loss_ce": 0.00028479506727308035, + "loss_iou": 0.390625, + "loss_num": 0.0076904296875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 441189936, + "step": 3493 + }, + { + "epoch": 0.8962996216250881, + "grad_norm": 58.422821044921875, + "learning_rate": 5e-06, + "loss": 0.8987, + "num_input_tokens_seen": 441316788, + "step": 3494 + }, + { + "epoch": 0.8962996216250881, + "loss": 0.831887423992157, + "loss_ce": 0.0027858330868184566, + "loss_iou": 0.390625, + "loss_num": 0.00921630859375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 441316788, + "step": 3494 + }, + { + "epoch": 0.8965561469890335, + "grad_norm": 34.17292022705078, + "learning_rate": 5e-06, + "loss": 0.9805, + "num_input_tokens_seen": 441441892, + "step": 3495 + }, + { + "epoch": 0.8965561469890335, + "loss": 1.2646539211273193, + "loss_ce": 0.0009820564882829785, + "loss_iou": 0.55078125, + "loss_num": 0.032470703125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 441441892, + "step": 3495 + }, + { + "epoch": 0.8968126723529789, + "grad_norm": 72.06352233886719, + "learning_rate": 5e-06, + "loss": 0.8735, + "num_input_tokens_seen": 441568816, + "step": 3496 + }, + { + "epoch": 0.8968126723529789, + "loss": 0.8789844512939453, + "loss_ce": 7.81727212597616e-05, + "loss_iou": 0.421875, + "loss_num": 0.006622314453125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 441568816, + "step": 3496 + }, + { + "epoch": 0.8970691977169243, + "grad_norm": 42.339481353759766, + "learning_rate": 5e-06, + "loss": 0.8974, + "num_input_tokens_seen": 441694836, + "step": 3497 + }, + { + "epoch": 0.8970691977169243, + "loss": 0.8050779700279236, + "loss_ce": 0.0003904563491232693, + "loss_iou": 0.3828125, + "loss_num": 0.007598876953125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 441694836, + "step": 3497 + }, + { + "epoch": 0.8973257230808697, + "grad_norm": 46.16578674316406, + "learning_rate": 5e-06, + "loss": 0.8511, + "num_input_tokens_seen": 441820296, + "step": 3498 + }, + { + "epoch": 0.8973257230808697, + "loss": 0.8251259922981262, + "loss_ce": 0.0004189509782008827, + "loss_iou": 0.396484375, + "loss_num": 0.00604248046875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 441820296, + "step": 3498 + }, + { + "epoch": 0.8975822484448149, + "grad_norm": 46.324771881103516, + "learning_rate": 5e-06, + "loss": 0.9482, + "num_input_tokens_seen": 441946708, + "step": 3499 + }, + { + "epoch": 0.8975822484448149, + "loss": 0.9134190082550049, + "loss_ce": 0.0010655266232788563, + "loss_iou": 0.42578125, + "loss_num": 0.01220703125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 441946708, + "step": 3499 + }, + { + "epoch": 0.8978387738087603, + "grad_norm": 87.24795532226562, + "learning_rate": 5e-06, + "loss": 0.9858, + "num_input_tokens_seen": 442072764, + "step": 3500 + }, + { + "epoch": 0.8978387738087603, + "eval_icons_CIoU": 0.2654718831181526, + "eval_icons_GIoU": 0.22349807620048523, + "eval_icons_IoU": 0.44777700304985046, + "eval_icons_MAE_all": 0.027041063643991947, + "eval_icons_MAE_h": 0.03446871228516102, + "eval_icons_MAE_w": 0.06065436080098152, + "eval_icons_MAE_x_boxes": 0.05538228526711464, + "eval_icons_MAE_y_boxes": 0.035673145204782486, + "eval_icons_NUM_probability": 0.9998672604560852, + "eval_icons_inside_bbox": 0.7239583432674408, + "eval_icons_loss": 1.6387982368469238, + "eval_icons_loss_ce": 4.188341881672386e-05, + "eval_icons_loss_iou": 0.75274658203125, + "eval_icons_loss_num": 0.029598236083984375, + "eval_icons_loss_xval": 1.65380859375, + "eval_icons_runtime": 63.3751, + "eval_icons_samples_per_second": 0.789, + "eval_icons_steps_per_second": 0.032, + "num_input_tokens_seen": 442072764, + "step": 3500 + }, + { + "epoch": 0.8978387738087603, + "eval_screenspot_CIoU": 0.13605733960866928, + "eval_screenspot_GIoU": 0.12390563388665517, + "eval_screenspot_IoU": 0.3017067611217499, + "eval_screenspot_MAE_all": 0.07498623803257942, + "eval_screenspot_MAE_h": 0.06864214067657788, + "eval_screenspot_MAE_w": 0.1201626608769099, + "eval_screenspot_MAE_x_boxes": 0.09734771897395451, + "eval_screenspot_MAE_y_boxes": 0.0548208753267924, + "eval_screenspot_NUM_probability": 0.9999479254086813, + "eval_screenspot_inside_bbox": 0.6254166762034098, + "eval_screenspot_loss": 2.1713383197784424, + "eval_screenspot_loss_ce": 0.002350811652528743, + "eval_screenspot_loss_iou": 0.9013671875, + "eval_screenspot_loss_num": 0.07902272542317708, + "eval_screenspot_loss_xval": 2.1969401041666665, + "eval_screenspot_runtime": 110.3971, + "eval_screenspot_samples_per_second": 0.806, + "eval_screenspot_steps_per_second": 0.027, + "num_input_tokens_seen": 442072764, + "step": 3500 + }, + { + "epoch": 0.8978387738087603, + "loss": 2.1443264484405518, + "loss_ce": 0.0017482805997133255, + "loss_iou": 0.89453125, + "loss_num": 0.07080078125, + "loss_xval": 2.140625, + "num_input_tokens_seen": 442072764, + "step": 3500 + }, + { + "epoch": 0.8980952991727057, + "grad_norm": 46.38233947753906, + "learning_rate": 5e-06, + "loss": 0.8128, + "num_input_tokens_seen": 442199348, + "step": 3501 + }, + { + "epoch": 0.8980952991727057, + "loss": 0.6705584526062012, + "loss_ce": 0.00014835037291049957, + "loss_iou": 0.318359375, + "loss_num": 0.006805419921875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 442199348, + "step": 3501 + }, + { + "epoch": 0.8983518245366511, + "grad_norm": 53.78159713745117, + "learning_rate": 5e-06, + "loss": 0.8524, + "num_input_tokens_seen": 442325216, + "step": 3502 + }, + { + "epoch": 0.8983518245366511, + "loss": 0.8385587334632874, + "loss_ce": 0.0006681042723357677, + "loss_iou": 0.384765625, + "loss_num": 0.01361083984375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 442325216, + "step": 3502 + }, + { + "epoch": 0.8986083499005965, + "grad_norm": 44.035343170166016, + "learning_rate": 5e-06, + "loss": 1.0783, + "num_input_tokens_seen": 442450048, + "step": 3503 + }, + { + "epoch": 0.8986083499005965, + "loss": 1.1966700553894043, + "loss_ce": 0.0028223723638802767, + "loss_iou": 0.51953125, + "loss_num": 0.03173828125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 442450048, + "step": 3503 + }, + { + "epoch": 0.8988648752645417, + "grad_norm": 23.796772003173828, + "learning_rate": 5e-06, + "loss": 0.875, + "num_input_tokens_seen": 442575464, + "step": 3504 + }, + { + "epoch": 0.8988648752645417, + "loss": 0.6904147267341614, + "loss_ce": 0.0009616065653972328, + "loss_iou": 0.33203125, + "loss_num": 0.005157470703125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 442575464, + "step": 3504 + }, + { + "epoch": 0.8991214006284871, + "grad_norm": 23.05321502685547, + "learning_rate": 5e-06, + "loss": 0.8855, + "num_input_tokens_seen": 442701156, + "step": 3505 + }, + { + "epoch": 0.8991214006284871, + "loss": 0.9829712510108948, + "loss_ce": 0.0003051835810765624, + "loss_iou": 0.431640625, + "loss_num": 0.0240478515625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 442701156, + "step": 3505 + }, + { + "epoch": 0.8993779259924325, + "grad_norm": 37.0819091796875, + "learning_rate": 5e-06, + "loss": 0.9167, + "num_input_tokens_seen": 442827108, + "step": 3506 + }, + { + "epoch": 0.8993779259924325, + "loss": 0.9251057505607605, + "loss_ce": 0.001765865832567215, + "loss_iou": 0.4296875, + "loss_num": 0.0123291015625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 442827108, + "step": 3506 + }, + { + "epoch": 0.8996344513563779, + "grad_norm": 45.012508392333984, + "learning_rate": 5e-06, + "loss": 0.9256, + "num_input_tokens_seen": 442953696, + "step": 3507 + }, + { + "epoch": 0.8996344513563779, + "loss": 1.1885885000228882, + "loss_ce": 0.00011190435907337815, + "loss_iou": 0.54296875, + "loss_num": 0.0205078125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 442953696, + "step": 3507 + }, + { + "epoch": 0.8998909767203233, + "grad_norm": 52.82979202270508, + "learning_rate": 5e-06, + "loss": 0.8228, + "num_input_tokens_seen": 443080104, + "step": 3508 + }, + { + "epoch": 0.8998909767203233, + "loss": 0.8294578790664673, + "loss_ce": 0.005361170042306185, + "loss_iou": 0.37109375, + "loss_num": 0.015869140625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 443080104, + "step": 3508 + }, + { + "epoch": 0.9001475020842686, + "grad_norm": 61.6356315612793, + "learning_rate": 5e-06, + "loss": 0.8873, + "num_input_tokens_seen": 443207104, + "step": 3509 + }, + { + "epoch": 0.9001475020842686, + "loss": 0.8206233978271484, + "loss_ce": 0.0007991431630216539, + "loss_iou": 0.384765625, + "loss_num": 0.01031494140625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 443207104, + "step": 3509 + }, + { + "epoch": 0.9004040274482139, + "grad_norm": 59.79911804199219, + "learning_rate": 5e-06, + "loss": 0.9424, + "num_input_tokens_seen": 443334084, + "step": 3510 + }, + { + "epoch": 0.9004040274482139, + "loss": 1.0409942865371704, + "loss_ce": 0.007791214156895876, + "loss_iou": 0.462890625, + "loss_num": 0.021240234375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 443334084, + "step": 3510 + }, + { + "epoch": 0.9006605528121593, + "grad_norm": 57.79722213745117, + "learning_rate": 5e-06, + "loss": 0.8694, + "num_input_tokens_seen": 443460344, + "step": 3511 + }, + { + "epoch": 0.9006605528121593, + "loss": 0.8211153745651245, + "loss_ce": 0.0008028781157918274, + "loss_iou": 0.38671875, + "loss_num": 0.009033203125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 443460344, + "step": 3511 + }, + { + "epoch": 0.9009170781761047, + "grad_norm": 97.26219177246094, + "learning_rate": 5e-06, + "loss": 0.9486, + "num_input_tokens_seen": 443585716, + "step": 3512 + }, + { + "epoch": 0.9009170781761047, + "loss": 1.117616891860962, + "loss_ce": 0.00042932823998853564, + "loss_iou": 0.515625, + "loss_num": 0.0177001953125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 443585716, + "step": 3512 + }, + { + "epoch": 0.90117360354005, + "grad_norm": 47.98212814331055, + "learning_rate": 5e-06, + "loss": 0.9914, + "num_input_tokens_seen": 443711520, + "step": 3513 + }, + { + "epoch": 0.90117360354005, + "loss": 0.7896994948387146, + "loss_ce": 0.0001486846449552104, + "loss_iou": 0.375, + "loss_num": 0.00738525390625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 443711520, + "step": 3513 + }, + { + "epoch": 0.9014301289039954, + "grad_norm": 45.594024658203125, + "learning_rate": 5e-06, + "loss": 0.9398, + "num_input_tokens_seen": 443837168, + "step": 3514 + }, + { + "epoch": 0.9014301289039954, + "loss": 0.8663794994354248, + "loss_ce": 0.0011451354948803782, + "loss_iou": 0.40234375, + "loss_num": 0.01171875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 443837168, + "step": 3514 + }, + { + "epoch": 0.9016866542679407, + "grad_norm": 44.8537483215332, + "learning_rate": 5e-06, + "loss": 0.9758, + "num_input_tokens_seen": 443964116, + "step": 3515 + }, + { + "epoch": 0.9016866542679407, + "loss": 0.9496171474456787, + "loss_ce": 0.003816381096839905, + "loss_iou": 0.431640625, + "loss_num": 0.016357421875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 443964116, + "step": 3515 + }, + { + "epoch": 0.9019431796318861, + "grad_norm": 53.01325225830078, + "learning_rate": 5e-06, + "loss": 0.8896, + "num_input_tokens_seen": 444090036, + "step": 3516 + }, + { + "epoch": 0.9019431796318861, + "loss": 0.8805411458015442, + "loss_ce": 0.0011466338764876127, + "loss_iou": 0.41015625, + "loss_num": 0.01214599609375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 444090036, + "step": 3516 + }, + { + "epoch": 0.9021997049958315, + "grad_norm": 51.024559020996094, + "learning_rate": 5e-06, + "loss": 1.0241, + "num_input_tokens_seen": 444216560, + "step": 3517 + }, + { + "epoch": 0.9021997049958315, + "loss": 1.1311421394348145, + "loss_ce": 0.006630385294556618, + "loss_iou": 0.50390625, + "loss_num": 0.023681640625, + "loss_xval": 1.125, + "num_input_tokens_seen": 444216560, + "step": 3517 + }, + { + "epoch": 0.9024562303597768, + "grad_norm": 44.28757858276367, + "learning_rate": 5e-06, + "loss": 0.8767, + "num_input_tokens_seen": 444342192, + "step": 3518 + }, + { + "epoch": 0.9024562303597768, + "loss": 0.9214938879013062, + "loss_ce": 0.0010837230365723372, + "loss_iou": 0.4296875, + "loss_num": 0.0120849609375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 444342192, + "step": 3518 + }, + { + "epoch": 0.9027127557237222, + "grad_norm": 52.65424346923828, + "learning_rate": 5e-06, + "loss": 0.8605, + "num_input_tokens_seen": 444468660, + "step": 3519 + }, + { + "epoch": 0.9027127557237222, + "loss": 0.9345363974571228, + "loss_ce": 0.003384036710485816, + "loss_iou": 0.42578125, + "loss_num": 0.0162353515625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 444468660, + "step": 3519 + }, + { + "epoch": 0.9029692810876675, + "grad_norm": 58.502655029296875, + "learning_rate": 5e-06, + "loss": 0.8722, + "num_input_tokens_seen": 444595660, + "step": 3520 + }, + { + "epoch": 0.9029692810876675, + "loss": 0.9721913933753967, + "loss_ce": 0.0019765368197113276, + "loss_iou": 0.44140625, + "loss_num": 0.0179443359375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 444595660, + "step": 3520 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 46.94337463378906, + "learning_rate": 5e-06, + "loss": 0.8831, + "num_input_tokens_seen": 444721632, + "step": 3521 + }, + { + "epoch": 0.9032258064516129, + "loss": 0.6785579919815063, + "loss_ce": 0.004241609014570713, + "loss_iou": 0.326171875, + "loss_num": 0.00445556640625, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 444721632, + "step": 3521 + }, + { + "epoch": 0.9034823318155583, + "grad_norm": 45.724517822265625, + "learning_rate": 5e-06, + "loss": 0.914, + "num_input_tokens_seen": 444847820, + "step": 3522 + }, + { + "epoch": 0.9034823318155583, + "loss": 0.9274861216545105, + "loss_ce": 0.001704866299405694, + "loss_iou": 0.439453125, + "loss_num": 0.00946044921875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 444847820, + "step": 3522 + }, + { + "epoch": 0.9037388571795036, + "grad_norm": 42.25807189941406, + "learning_rate": 5e-06, + "loss": 0.9332, + "num_input_tokens_seen": 444973040, + "step": 3523 + }, + { + "epoch": 0.9037388571795036, + "loss": 0.9668737649917603, + "loss_ce": 0.002518265275284648, + "loss_iou": 0.431640625, + "loss_num": 0.02001953125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 444973040, + "step": 3523 + }, + { + "epoch": 0.903995382543449, + "grad_norm": 25.604503631591797, + "learning_rate": 5e-06, + "loss": 0.9098, + "num_input_tokens_seen": 445099168, + "step": 3524 + }, + { + "epoch": 0.903995382543449, + "loss": 0.9530107378959656, + "loss_ce": 0.005745092872530222, + "loss_iou": 0.416015625, + "loss_num": 0.0228271484375, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 445099168, + "step": 3524 + }, + { + "epoch": 0.9042519079073943, + "grad_norm": 44.05077362060547, + "learning_rate": 5e-06, + "loss": 0.9523, + "num_input_tokens_seen": 445223668, + "step": 3525 + }, + { + "epoch": 0.9042519079073943, + "loss": 0.9463621377944946, + "loss_ce": 0.002026232425123453, + "loss_iou": 0.427734375, + "loss_num": 0.018310546875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 445223668, + "step": 3525 + }, + { + "epoch": 0.9045084332713397, + "grad_norm": 26.812744140625, + "learning_rate": 5e-06, + "loss": 0.9285, + "num_input_tokens_seen": 445349012, + "step": 3526 + }, + { + "epoch": 0.9045084332713397, + "loss": 0.8913105130195618, + "loss_ce": 0.0011737886816263199, + "loss_iou": 0.412109375, + "loss_num": 0.01312255859375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 445349012, + "step": 3526 + }, + { + "epoch": 0.904764958635285, + "grad_norm": 38.29542541503906, + "learning_rate": 5e-06, + "loss": 0.931, + "num_input_tokens_seen": 445476084, + "step": 3527 + }, + { + "epoch": 0.904764958635285, + "loss": 0.7823600769042969, + "loss_ce": 0.0006218089838512242, + "loss_iou": 0.373046875, + "loss_num": 0.00738525390625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 445476084, + "step": 3527 + }, + { + "epoch": 0.9050214839992304, + "grad_norm": 56.2513542175293, + "learning_rate": 5e-06, + "loss": 0.9113, + "num_input_tokens_seen": 445602484, + "step": 3528 + }, + { + "epoch": 0.9050214839992304, + "loss": 0.833781361579895, + "loss_ce": 0.0022383553441613913, + "loss_iou": 0.3828125, + "loss_num": 0.0133056640625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 445602484, + "step": 3528 + }, + { + "epoch": 0.9052780093631758, + "grad_norm": 47.22053527832031, + "learning_rate": 5e-06, + "loss": 0.9416, + "num_input_tokens_seen": 445728248, + "step": 3529 + }, + { + "epoch": 0.9052780093631758, + "loss": 1.0316959619522095, + "loss_ce": 0.0009342351695522666, + "loss_iou": 0.470703125, + "loss_num": 0.0181884765625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 445728248, + "step": 3529 + }, + { + "epoch": 0.9055345347271212, + "grad_norm": 35.72885513305664, + "learning_rate": 5e-06, + "loss": 1.0237, + "num_input_tokens_seen": 445854940, + "step": 3530 + }, + { + "epoch": 0.9055345347271212, + "loss": 0.9220623970031738, + "loss_ce": 0.00018737863865680993, + "loss_iou": 0.43359375, + "loss_num": 0.01104736328125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 445854940, + "step": 3530 + }, + { + "epoch": 0.9057910600910665, + "grad_norm": 25.30722427368164, + "learning_rate": 5e-06, + "loss": 0.936, + "num_input_tokens_seen": 445981600, + "step": 3531 + }, + { + "epoch": 0.9057910600910665, + "loss": 0.9216327667236328, + "loss_ce": 0.0017109165200963616, + "loss_iou": 0.4296875, + "loss_num": 0.01202392578125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 445981600, + "step": 3531 + }, + { + "epoch": 0.9060475854550119, + "grad_norm": 28.16413116455078, + "learning_rate": 5e-06, + "loss": 0.7903, + "num_input_tokens_seen": 446107548, + "step": 3532 + }, + { + "epoch": 0.9060475854550119, + "loss": 0.7405537366867065, + "loss_ce": 0.001295936875976622, + "loss_iou": 0.341796875, + "loss_num": 0.01123046875, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 446107548, + "step": 3532 + }, + { + "epoch": 0.9063041108189572, + "grad_norm": 32.50393295288086, + "learning_rate": 5e-06, + "loss": 0.7892, + "num_input_tokens_seen": 446233924, + "step": 3533 + }, + { + "epoch": 0.9063041108189572, + "loss": 0.773316502571106, + "loss_ce": 0.0013438657624647021, + "loss_iou": 0.361328125, + "loss_num": 0.01019287109375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 446233924, + "step": 3533 + }, + { + "epoch": 0.9065606361829026, + "grad_norm": 37.29848861694336, + "learning_rate": 5e-06, + "loss": 0.8753, + "num_input_tokens_seen": 446359204, + "step": 3534 + }, + { + "epoch": 0.9065606361829026, + "loss": 1.0286892652511597, + "loss_ce": 0.0028103869408369064, + "loss_iou": 0.4765625, + "loss_num": 0.0147705078125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 446359204, + "step": 3534 + }, + { + "epoch": 0.906817161546848, + "grad_norm": 29.306903839111328, + "learning_rate": 5e-06, + "loss": 0.9663, + "num_input_tokens_seen": 446486140, + "step": 3535 + }, + { + "epoch": 0.906817161546848, + "loss": 0.9763796329498291, + "loss_ce": 0.0017702667973935604, + "loss_iou": 0.443359375, + "loss_num": 0.017822265625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 446486140, + "step": 3535 + }, + { + "epoch": 0.9070736869107933, + "grad_norm": 29.029014587402344, + "learning_rate": 5e-06, + "loss": 0.9751, + "num_input_tokens_seen": 446612376, + "step": 3536 + }, + { + "epoch": 0.9070736869107933, + "loss": 0.7437125444412231, + "loss_ce": 0.0005485069705173373, + "loss_iou": 0.34375, + "loss_num": 0.0107421875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 446612376, + "step": 3536 + }, + { + "epoch": 0.9073302122747386, + "grad_norm": 17.07200813293457, + "learning_rate": 5e-06, + "loss": 0.8208, + "num_input_tokens_seen": 446737408, + "step": 3537 + }, + { + "epoch": 0.9073302122747386, + "loss": 0.6629765033721924, + "loss_ce": 0.001111278892494738, + "loss_iou": 0.306640625, + "loss_num": 0.009521484375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 446737408, + "step": 3537 + }, + { + "epoch": 0.907586737638684, + "grad_norm": 24.991750717163086, + "learning_rate": 5e-06, + "loss": 0.7943, + "num_input_tokens_seen": 446863392, + "step": 3538 + }, + { + "epoch": 0.907586737638684, + "loss": 0.8355567455291748, + "loss_ce": 0.008408309891819954, + "loss_iou": 0.37109375, + "loss_num": 0.016845703125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 446863392, + "step": 3538 + }, + { + "epoch": 0.9078432630026294, + "grad_norm": 20.243528366088867, + "learning_rate": 5e-06, + "loss": 0.8369, + "num_input_tokens_seen": 446989348, + "step": 3539 + }, + { + "epoch": 0.9078432630026294, + "loss": 0.7361337542533875, + "loss_ce": 0.0022469796240329742, + "loss_iou": 0.35546875, + "loss_num": 0.004241943359375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 446989348, + "step": 3539 + }, + { + "epoch": 0.9080997883665748, + "grad_norm": 33.501129150390625, + "learning_rate": 5e-06, + "loss": 0.9269, + "num_input_tokens_seen": 447116184, + "step": 3540 + }, + { + "epoch": 0.9080997883665748, + "loss": 0.7837704420089722, + "loss_ce": 0.0008114438387565315, + "loss_iou": 0.3515625, + "loss_num": 0.015625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 447116184, + "step": 3540 + }, + { + "epoch": 0.9083563137305201, + "grad_norm": 43.51060104370117, + "learning_rate": 5e-06, + "loss": 0.8697, + "num_input_tokens_seen": 447241240, + "step": 3541 + }, + { + "epoch": 0.9083563137305201, + "loss": 0.9702218770980835, + "loss_ce": 0.001960119465366006, + "loss_iou": 0.4296875, + "loss_num": 0.021484375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 447241240, + "step": 3541 + }, + { + "epoch": 0.9086128390944654, + "grad_norm": 55.73686599731445, + "learning_rate": 5e-06, + "loss": 0.8724, + "num_input_tokens_seen": 447367776, + "step": 3542 + }, + { + "epoch": 0.9086128390944654, + "loss": 0.7337223291397095, + "loss_ce": 0.0008121485006995499, + "loss_iou": 0.349609375, + "loss_num": 0.00665283203125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 447367776, + "step": 3542 + }, + { + "epoch": 0.9088693644584108, + "grad_norm": 54.588218688964844, + "learning_rate": 5e-06, + "loss": 0.8397, + "num_input_tokens_seen": 447494868, + "step": 3543 + }, + { + "epoch": 0.9088693644584108, + "loss": 0.9064417481422424, + "loss_ce": 0.0011683020275086164, + "loss_iou": 0.4296875, + "loss_num": 0.00927734375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 447494868, + "step": 3543 + }, + { + "epoch": 0.9091258898223562, + "grad_norm": 69.662109375, + "learning_rate": 5e-06, + "loss": 0.9427, + "num_input_tokens_seen": 447621452, + "step": 3544 + }, + { + "epoch": 0.9091258898223562, + "loss": 0.9093494415283203, + "loss_ce": 0.0021228936966508627, + "loss_iou": 0.42578125, + "loss_num": 0.0115966796875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 447621452, + "step": 3544 + }, + { + "epoch": 0.9093824151863016, + "grad_norm": 45.868526458740234, + "learning_rate": 5e-06, + "loss": 1.0042, + "num_input_tokens_seen": 447747044, + "step": 3545 + }, + { + "epoch": 0.9093824151863016, + "loss": 0.877912700176239, + "loss_ce": 0.0009595580631867051, + "loss_iou": 0.421875, + "loss_num": 0.006561279296875, + "loss_xval": 0.875, + "num_input_tokens_seen": 447747044, + "step": 3545 + }, + { + "epoch": 0.9096389405502469, + "grad_norm": 31.22001075744629, + "learning_rate": 5e-06, + "loss": 0.9164, + "num_input_tokens_seen": 447872636, + "step": 3546 + }, + { + "epoch": 0.9096389405502469, + "loss": 0.9559316635131836, + "loss_ce": 0.00305080134421587, + "loss_iou": 0.41796875, + "loss_num": 0.0235595703125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 447872636, + "step": 3546 + }, + { + "epoch": 0.9098954659141922, + "grad_norm": 31.076433181762695, + "learning_rate": 5e-06, + "loss": 0.859, + "num_input_tokens_seen": 447998564, + "step": 3547 + }, + { + "epoch": 0.9098954659141922, + "loss": 0.8893541097640991, + "loss_ce": 0.0006822406430728734, + "loss_iou": 0.419921875, + "loss_num": 0.00994873046875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 447998564, + "step": 3547 + }, + { + "epoch": 0.9101519912781376, + "grad_norm": 32.137046813964844, + "learning_rate": 5e-06, + "loss": 0.861, + "num_input_tokens_seen": 448125348, + "step": 3548 + }, + { + "epoch": 0.9101519912781376, + "loss": 0.7507063150405884, + "loss_ce": 0.0009504628833383322, + "loss_iou": 0.34765625, + "loss_num": 0.0111083984375, + "loss_xval": 0.75, + "num_input_tokens_seen": 448125348, + "step": 3548 + }, + { + "epoch": 0.910408516642083, + "grad_norm": 29.698827743530273, + "learning_rate": 5e-06, + "loss": 0.8532, + "num_input_tokens_seen": 448251052, + "step": 3549 + }, + { + "epoch": 0.910408516642083, + "loss": 0.7895252704620361, + "loss_ce": 0.0009510432719253004, + "loss_iou": 0.37890625, + "loss_num": 0.006439208984375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 448251052, + "step": 3549 + }, + { + "epoch": 0.9106650420060284, + "grad_norm": 25.09073829650879, + "learning_rate": 5e-06, + "loss": 1.02, + "num_input_tokens_seen": 448376396, + "step": 3550 + }, + { + "epoch": 0.9106650420060284, + "loss": 0.967451810836792, + "loss_ce": 0.002852232661098242, + "loss_iou": 0.447265625, + "loss_num": 0.01416015625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 448376396, + "step": 3550 + }, + { + "epoch": 0.9109215673699738, + "grad_norm": 30.87039566040039, + "learning_rate": 5e-06, + "loss": 0.912, + "num_input_tokens_seen": 448503012, + "step": 3551 + }, + { + "epoch": 0.9109215673699738, + "loss": 0.9961769580841064, + "loss_ce": 0.0005714566214010119, + "loss_iou": 0.45703125, + "loss_num": 0.0162353515625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 448503012, + "step": 3551 + }, + { + "epoch": 0.911178092733919, + "grad_norm": 47.42852783203125, + "learning_rate": 5e-06, + "loss": 0.8658, + "num_input_tokens_seen": 448628396, + "step": 3552 + }, + { + "epoch": 0.911178092733919, + "loss": 0.8807763457298279, + "loss_ce": 0.00040522898780182004, + "loss_iou": 0.412109375, + "loss_num": 0.01165771484375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 448628396, + "step": 3552 + }, + { + "epoch": 0.9114346180978644, + "grad_norm": 46.73428726196289, + "learning_rate": 5e-06, + "loss": 0.9065, + "num_input_tokens_seen": 448755092, + "step": 3553 + }, + { + "epoch": 0.9114346180978644, + "loss": 0.9566335678100586, + "loss_ce": 9.056551789399236e-05, + "loss_iou": 0.453125, + "loss_num": 0.01007080078125, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 448755092, + "step": 3553 + }, + { + "epoch": 0.9116911434618098, + "grad_norm": 41.932430267333984, + "learning_rate": 5e-06, + "loss": 0.893, + "num_input_tokens_seen": 448882040, + "step": 3554 + }, + { + "epoch": 0.9116911434618098, + "loss": 0.7321056723594666, + "loss_ce": 0.00017206420307047665, + "loss_iou": 0.341796875, + "loss_num": 0.009765625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 448882040, + "step": 3554 + }, + { + "epoch": 0.9119476688257552, + "grad_norm": 52.546939849853516, + "learning_rate": 5e-06, + "loss": 0.8637, + "num_input_tokens_seen": 449009772, + "step": 3555 + }, + { + "epoch": 0.9119476688257552, + "loss": 0.8133898377418518, + "loss_ce": 0.0006457075942307711, + "loss_iou": 0.384765625, + "loss_num": 0.0081787109375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 449009772, + "step": 3555 + }, + { + "epoch": 0.9122041941897006, + "grad_norm": 60.80083465576172, + "learning_rate": 5e-06, + "loss": 0.9484, + "num_input_tokens_seen": 449136108, + "step": 3556 + }, + { + "epoch": 0.9122041941897006, + "loss": 0.7951382398605347, + "loss_ce": 0.0007046492537483573, + "loss_iou": 0.369140625, + "loss_num": 0.01123046875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 449136108, + "step": 3556 + }, + { + "epoch": 0.9124607195536458, + "grad_norm": 46.38669204711914, + "learning_rate": 5e-06, + "loss": 0.9402, + "num_input_tokens_seen": 449263896, + "step": 3557 + }, + { + "epoch": 0.9124607195536458, + "loss": 0.962466835975647, + "loss_ce": 0.0010409834794700146, + "loss_iou": 0.4375, + "loss_num": 0.017333984375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 449263896, + "step": 3557 + }, + { + "epoch": 0.9127172449175912, + "grad_norm": 32.94407272338867, + "learning_rate": 5e-06, + "loss": 0.7165, + "num_input_tokens_seen": 449389812, + "step": 3558 + }, + { + "epoch": 0.9127172449175912, + "loss": 0.7186746597290039, + "loss_ce": 0.0009012097143568099, + "loss_iou": 0.341796875, + "loss_num": 0.007080078125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 449389812, + "step": 3558 + }, + { + "epoch": 0.9129737702815366, + "grad_norm": 38.93385696411133, + "learning_rate": 5e-06, + "loss": 0.9362, + "num_input_tokens_seen": 449516076, + "step": 3559 + }, + { + "epoch": 0.9129737702815366, + "loss": 0.7839512228965759, + "loss_ce": 0.00050395104335621, + "loss_iou": 0.3671875, + "loss_num": 0.01019287109375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 449516076, + "step": 3559 + }, + { + "epoch": 0.913230295645482, + "grad_norm": 39.94411087036133, + "learning_rate": 5e-06, + "loss": 0.9397, + "num_input_tokens_seen": 449642108, + "step": 3560 + }, + { + "epoch": 0.913230295645482, + "loss": 0.9796422719955444, + "loss_ce": 0.0006384333246387541, + "loss_iou": 0.455078125, + "loss_num": 0.0137939453125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 449642108, + "step": 3560 + }, + { + "epoch": 0.9134868210094274, + "grad_norm": 66.95171356201172, + "learning_rate": 5e-06, + "loss": 1.0358, + "num_input_tokens_seen": 449768256, + "step": 3561 + }, + { + "epoch": 0.9134868210094274, + "loss": 0.8995403051376343, + "loss_ce": 0.004032548982650042, + "loss_iou": 0.408203125, + "loss_num": 0.0157470703125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 449768256, + "step": 3561 + }, + { + "epoch": 0.9137433463733726, + "grad_norm": 60.34748458862305, + "learning_rate": 5e-06, + "loss": 0.9017, + "num_input_tokens_seen": 449895892, + "step": 3562 + }, + { + "epoch": 0.9137433463733726, + "loss": 0.760934591293335, + "loss_ce": 0.00043656484922394156, + "loss_iou": 0.361328125, + "loss_num": 0.00762939453125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 449895892, + "step": 3562 + }, + { + "epoch": 0.913999871737318, + "grad_norm": 51.024227142333984, + "learning_rate": 5e-06, + "loss": 0.9594, + "num_input_tokens_seen": 450023040, + "step": 3563 + }, + { + "epoch": 0.913999871737318, + "loss": 1.0174822807312012, + "loss_ce": 0.0028338762931525707, + "loss_iou": 0.451171875, + "loss_num": 0.022705078125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 450023040, + "step": 3563 + }, + { + "epoch": 0.9142563971012634, + "grad_norm": 23.56356430053711, + "learning_rate": 5e-06, + "loss": 0.7902, + "num_input_tokens_seen": 450151016, + "step": 3564 + }, + { + "epoch": 0.9142563971012634, + "loss": 0.7439143657684326, + "loss_ce": 0.0005061838892288506, + "loss_iou": 0.35546875, + "loss_num": 0.0067138671875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 450151016, + "step": 3564 + }, + { + "epoch": 0.9145129224652088, + "grad_norm": 38.1552619934082, + "learning_rate": 5e-06, + "loss": 0.9373, + "num_input_tokens_seen": 450277132, + "step": 3565 + }, + { + "epoch": 0.9145129224652088, + "loss": 0.7573054432868958, + "loss_ce": 0.00046950666001066566, + "loss_iou": 0.359375, + "loss_num": 0.00738525390625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 450277132, + "step": 3565 + }, + { + "epoch": 0.9147694478291541, + "grad_norm": 38.852298736572266, + "learning_rate": 5e-06, + "loss": 0.9107, + "num_input_tokens_seen": 450403884, + "step": 3566 + }, + { + "epoch": 0.9147694478291541, + "loss": 0.8297868371009827, + "loss_ce": 0.00019696576055139303, + "loss_iou": 0.390625, + "loss_num": 0.00994873046875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 450403884, + "step": 3566 + }, + { + "epoch": 0.9150259731930994, + "grad_norm": 46.44561767578125, + "learning_rate": 5e-06, + "loss": 0.9108, + "num_input_tokens_seen": 450530036, + "step": 3567 + }, + { + "epoch": 0.9150259731930994, + "loss": 0.910895824432373, + "loss_ce": 0.003913380671292543, + "loss_iou": 0.404296875, + "loss_num": 0.0194091796875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 450530036, + "step": 3567 + }, + { + "epoch": 0.9152824985570448, + "grad_norm": 36.37361145019531, + "learning_rate": 5e-06, + "loss": 0.9989, + "num_input_tokens_seen": 450656160, + "step": 3568 + }, + { + "epoch": 0.9152824985570448, + "loss": 1.080442190170288, + "loss_ce": 0.00036418650415726006, + "loss_iou": 0.49609375, + "loss_num": 0.017333984375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 450656160, + "step": 3568 + }, + { + "epoch": 0.9155390239209902, + "grad_norm": 45.88718795776367, + "learning_rate": 5e-06, + "loss": 0.7905, + "num_input_tokens_seen": 450783064, + "step": 3569 + }, + { + "epoch": 0.9155390239209902, + "loss": 0.9332125782966614, + "loss_ce": 0.003036813111975789, + "loss_iou": 0.423828125, + "loss_num": 0.0162353515625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 450783064, + "step": 3569 + }, + { + "epoch": 0.9157955492849356, + "grad_norm": 54.20616149902344, + "learning_rate": 5e-06, + "loss": 0.9388, + "num_input_tokens_seen": 450909132, + "step": 3570 + }, + { + "epoch": 0.9157955492849356, + "loss": 0.7763097286224365, + "loss_ce": 0.0028722588904201984, + "loss_iou": 0.36328125, + "loss_num": 0.00897216796875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 450909132, + "step": 3570 + }, + { + "epoch": 0.9160520746488809, + "grad_norm": 56.94639205932617, + "learning_rate": 5e-06, + "loss": 0.8622, + "num_input_tokens_seen": 451035772, + "step": 3571 + }, + { + "epoch": 0.9160520746488809, + "loss": 0.7380505204200745, + "loss_ce": 0.0024548314977437258, + "loss_iou": 0.341796875, + "loss_num": 0.0103759765625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 451035772, + "step": 3571 + }, + { + "epoch": 0.9163086000128262, + "grad_norm": 44.200870513916016, + "learning_rate": 5e-06, + "loss": 0.9807, + "num_input_tokens_seen": 451162696, + "step": 3572 + }, + { + "epoch": 0.9163086000128262, + "loss": 1.127568006515503, + "loss_ce": 0.002079758094623685, + "loss_iou": 0.53125, + "loss_num": 0.012451171875, + "loss_xval": 1.125, + "num_input_tokens_seen": 451162696, + "step": 3572 + }, + { + "epoch": 0.9165651253767716, + "grad_norm": 78.45826721191406, + "learning_rate": 5e-06, + "loss": 0.8828, + "num_input_tokens_seen": 451288544, + "step": 3573 + }, + { + "epoch": 0.9165651253767716, + "loss": 0.7745845317840576, + "loss_ce": 0.0031001700554043055, + "loss_iou": 0.349609375, + "loss_num": 0.013916015625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 451288544, + "step": 3573 + }, + { + "epoch": 0.916821650740717, + "grad_norm": 374.9405822753906, + "learning_rate": 5e-06, + "loss": 0.9584, + "num_input_tokens_seen": 451416008, + "step": 3574 + }, + { + "epoch": 0.916821650740717, + "loss": 1.0144587755203247, + "loss_ce": 0.003716591279953718, + "loss_iou": 0.455078125, + "loss_num": 0.0201416015625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 451416008, + "step": 3574 + }, + { + "epoch": 0.9170781761046624, + "grad_norm": 54.926761627197266, + "learning_rate": 5e-06, + "loss": 0.9177, + "num_input_tokens_seen": 451542784, + "step": 3575 + }, + { + "epoch": 0.9170781761046624, + "loss": 0.9802862405776978, + "loss_ce": 0.0007940400973893702, + "loss_iou": 0.44921875, + "loss_num": 0.0162353515625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 451542784, + "step": 3575 + }, + { + "epoch": 0.9173347014686077, + "grad_norm": 55.24159622192383, + "learning_rate": 5e-06, + "loss": 0.9116, + "num_input_tokens_seen": 451668940, + "step": 3576 + }, + { + "epoch": 0.9173347014686077, + "loss": 0.7721350193023682, + "loss_ce": 0.0006505917990580201, + "loss_iou": 0.369140625, + "loss_num": 0.0068359375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 451668940, + "step": 3576 + }, + { + "epoch": 0.9175912268325531, + "grad_norm": 55.0659065246582, + "learning_rate": 5e-06, + "loss": 0.914, + "num_input_tokens_seen": 451795444, + "step": 3577 + }, + { + "epoch": 0.9175912268325531, + "loss": 0.8302195072174072, + "loss_ce": 0.0016062329523265362, + "loss_iou": 0.3671875, + "loss_num": 0.01904296875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 451795444, + "step": 3577 + }, + { + "epoch": 0.9178477521964984, + "grad_norm": 39.24929428100586, + "learning_rate": 5e-06, + "loss": 0.9625, + "num_input_tokens_seen": 451920896, + "step": 3578 + }, + { + "epoch": 0.9178477521964984, + "loss": 0.9146767258644104, + "loss_ce": 0.002811483573168516, + "loss_iou": 0.419921875, + "loss_num": 0.01458740234375, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 451920896, + "step": 3578 + }, + { + "epoch": 0.9181042775604438, + "grad_norm": 25.633136749267578, + "learning_rate": 5e-06, + "loss": 0.8703, + "num_input_tokens_seen": 452047252, + "step": 3579 + }, + { + "epoch": 0.9181042775604438, + "loss": 0.7458038330078125, + "loss_ce": 0.00044251521467231214, + "loss_iou": 0.3515625, + "loss_num": 0.008544921875, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 452047252, + "step": 3579 + }, + { + "epoch": 0.9183608029243892, + "grad_norm": 27.30088233947754, + "learning_rate": 5e-06, + "loss": 0.8912, + "num_input_tokens_seen": 452174380, + "step": 3580 + }, + { + "epoch": 0.9183608029243892, + "loss": 0.9592449069023132, + "loss_ce": 0.0007487643742933869, + "loss_iou": 0.4453125, + "loss_num": 0.01336669921875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 452174380, + "step": 3580 + }, + { + "epoch": 0.9186173282883345, + "grad_norm": 32.88852310180664, + "learning_rate": 5e-06, + "loss": 0.8588, + "num_input_tokens_seen": 452301304, + "step": 3581 + }, + { + "epoch": 0.9186173282883345, + "loss": 1.1653746366500854, + "loss_ce": 0.000823814538307488, + "loss_iou": 0.5234375, + "loss_num": 0.0240478515625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 452301304, + "step": 3581 + }, + { + "epoch": 0.9188738536522799, + "grad_norm": 46.7718620300293, + "learning_rate": 5e-06, + "loss": 0.8391, + "num_input_tokens_seen": 452428916, + "step": 3582 + }, + { + "epoch": 0.9188738536522799, + "loss": 0.8042782545089722, + "loss_ce": 0.0005673380801454186, + "loss_iou": 0.37890625, + "loss_num": 0.00909423828125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 452428916, + "step": 3582 + }, + { + "epoch": 0.9191303790162252, + "grad_norm": 34.7113037109375, + "learning_rate": 5e-06, + "loss": 0.8511, + "num_input_tokens_seen": 452553180, + "step": 3583 + }, + { + "epoch": 0.9191303790162252, + "loss": 0.8503733277320862, + "loss_ce": 0.001740533858537674, + "loss_iou": 0.396484375, + "loss_num": 0.01092529296875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 452553180, + "step": 3583 + }, + { + "epoch": 0.9193869043801706, + "grad_norm": 23.291885375976562, + "learning_rate": 5e-06, + "loss": 0.8714, + "num_input_tokens_seen": 452680548, + "step": 3584 + }, + { + "epoch": 0.9193869043801706, + "loss": 0.9060357809066772, + "loss_ce": 0.001250644912943244, + "loss_iou": 0.42578125, + "loss_num": 0.01043701171875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 452680548, + "step": 3584 + }, + { + "epoch": 0.919643429744116, + "grad_norm": 29.620586395263672, + "learning_rate": 5e-06, + "loss": 0.8036, + "num_input_tokens_seen": 452807456, + "step": 3585 + }, + { + "epoch": 0.919643429744116, + "loss": 0.7334659695625305, + "loss_ce": 0.001532346592284739, + "loss_iou": 0.3515625, + "loss_num": 0.00616455078125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 452807456, + "step": 3585 + }, + { + "epoch": 0.9198999551080613, + "grad_norm": 57.58934783935547, + "learning_rate": 5e-06, + "loss": 0.8835, + "num_input_tokens_seen": 452932564, + "step": 3586 + }, + { + "epoch": 0.9198999551080613, + "loss": 0.8320668935775757, + "loss_ce": 0.004918421618640423, + "loss_iou": 0.376953125, + "loss_num": 0.0145263671875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 452932564, + "step": 3586 + }, + { + "epoch": 0.9201564804720067, + "grad_norm": 52.12625503540039, + "learning_rate": 5e-06, + "loss": 0.8993, + "num_input_tokens_seen": 453058740, + "step": 3587 + }, + { + "epoch": 0.9201564804720067, + "loss": 0.8211240768432617, + "loss_ce": 0.0032529851887375116, + "loss_iou": 0.37890625, + "loss_num": 0.01214599609375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 453058740, + "step": 3587 + }, + { + "epoch": 0.920413005835952, + "grad_norm": 61.159114837646484, + "learning_rate": 5e-06, + "loss": 0.9127, + "num_input_tokens_seen": 453186036, + "step": 3588 + }, + { + "epoch": 0.920413005835952, + "loss": 1.0000274181365967, + "loss_ce": 0.0005156383849680424, + "loss_iou": 0.4609375, + "loss_num": 0.0152587890625, + "loss_xval": 1.0, + "num_input_tokens_seen": 453186036, + "step": 3588 + }, + { + "epoch": 0.9206695311998974, + "grad_norm": 46.093658447265625, + "learning_rate": 5e-06, + "loss": 1.0092, + "num_input_tokens_seen": 453311944, + "step": 3589 + }, + { + "epoch": 0.9206695311998974, + "loss": 0.9159447550773621, + "loss_ce": 0.00041744066402316093, + "loss_iou": 0.4375, + "loss_num": 0.00823974609375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 453311944, + "step": 3589 + }, + { + "epoch": 0.9209260565638427, + "grad_norm": 16.026203155517578, + "learning_rate": 5e-06, + "loss": 0.9792, + "num_input_tokens_seen": 453437612, + "step": 3590 + }, + { + "epoch": 0.9209260565638427, + "loss": 0.9551845788955688, + "loss_ce": 0.0030361248645931482, + "loss_iou": 0.427734375, + "loss_num": 0.0194091796875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 453437612, + "step": 3590 + }, + { + "epoch": 0.9211825819277881, + "grad_norm": 24.938465118408203, + "learning_rate": 5e-06, + "loss": 0.8859, + "num_input_tokens_seen": 453563296, + "step": 3591 + }, + { + "epoch": 0.9211825819277881, + "loss": 0.7368862628936768, + "loss_ce": 0.0020230235531926155, + "loss_iou": 0.345703125, + "loss_num": 0.00860595703125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 453563296, + "step": 3591 + }, + { + "epoch": 0.9214391072917335, + "grad_norm": 39.061153411865234, + "learning_rate": 5e-06, + "loss": 0.8627, + "num_input_tokens_seen": 453689440, + "step": 3592 + }, + { + "epoch": 0.9214391072917335, + "loss": 0.8733949661254883, + "loss_ce": 0.0023012193851172924, + "loss_iou": 0.41796875, + "loss_num": 0.00732421875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 453689440, + "step": 3592 + }, + { + "epoch": 0.9216956326556788, + "grad_norm": 60.24197006225586, + "learning_rate": 5e-06, + "loss": 0.944, + "num_input_tokens_seen": 453816548, + "step": 3593 + }, + { + "epoch": 0.9216956326556788, + "loss": 0.9968253374099731, + "loss_ce": 0.0021964095067232847, + "loss_iou": 0.453125, + "loss_num": 0.017578125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 453816548, + "step": 3593 + }, + { + "epoch": 0.9219521580196242, + "grad_norm": 67.1270751953125, + "learning_rate": 5e-06, + "loss": 1.0212, + "num_input_tokens_seen": 453942516, + "step": 3594 + }, + { + "epoch": 0.9219521580196242, + "loss": 1.1927027702331543, + "loss_ce": 0.0012964674970135093, + "loss_iou": 0.5234375, + "loss_num": 0.0284423828125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 453942516, + "step": 3594 + }, + { + "epoch": 0.9222086833835695, + "grad_norm": 46.8626823425293, + "learning_rate": 5e-06, + "loss": 0.908, + "num_input_tokens_seen": 454068868, + "step": 3595 + }, + { + "epoch": 0.9222086833835695, + "loss": 0.8053247928619385, + "loss_ce": 0.00014900733367539942, + "loss_iou": 0.3828125, + "loss_num": 0.0081787109375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 454068868, + "step": 3595 + }, + { + "epoch": 0.9224652087475149, + "grad_norm": 33.43415832519531, + "learning_rate": 5e-06, + "loss": 0.8109, + "num_input_tokens_seen": 454194348, + "step": 3596 + }, + { + "epoch": 0.9224652087475149, + "loss": 0.7763883471488953, + "loss_ce": 0.0005094447988085449, + "loss_iou": 0.375, + "loss_num": 0.00555419921875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 454194348, + "step": 3596 + }, + { + "epoch": 0.9227217341114603, + "grad_norm": 33.79922866821289, + "learning_rate": 5e-06, + "loss": 0.7817, + "num_input_tokens_seen": 454320176, + "step": 3597 + }, + { + "epoch": 0.9227217341114603, + "loss": 0.8497979640960693, + "loss_ce": 0.0001886047248262912, + "loss_iou": 0.38671875, + "loss_num": 0.01513671875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 454320176, + "step": 3597 + }, + { + "epoch": 0.9229782594754057, + "grad_norm": 32.944217681884766, + "learning_rate": 5e-06, + "loss": 0.9339, + "num_input_tokens_seen": 454445352, + "step": 3598 + }, + { + "epoch": 0.9229782594754057, + "loss": 0.923587441444397, + "loss_ce": 0.001712408266030252, + "loss_iou": 0.41796875, + "loss_num": 0.0172119140625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 454445352, + "step": 3598 + }, + { + "epoch": 0.923234784839351, + "grad_norm": 31.126129150390625, + "learning_rate": 5e-06, + "loss": 0.8728, + "num_input_tokens_seen": 454572012, + "step": 3599 + }, + { + "epoch": 0.923234784839351, + "loss": 1.0917811393737793, + "loss_ce": 0.0019373171962797642, + "loss_iou": 0.5078125, + "loss_num": 0.0146484375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 454572012, + "step": 3599 + }, + { + "epoch": 0.9234913102032963, + "grad_norm": 27.537437438964844, + "learning_rate": 5e-06, + "loss": 0.9779, + "num_input_tokens_seen": 454699700, + "step": 3600 + }, + { + "epoch": 0.9234913102032963, + "loss": 0.8416576385498047, + "loss_ce": 0.00010485924576641992, + "loss_iou": 0.390625, + "loss_num": 0.0118408203125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 454699700, + "step": 3600 + }, + { + "epoch": 0.9237478355672417, + "grad_norm": 42.39928436279297, + "learning_rate": 5e-06, + "loss": 0.9024, + "num_input_tokens_seen": 454826896, + "step": 3601 + }, + { + "epoch": 0.9237478355672417, + "loss": 0.9392867684364319, + "loss_ce": 0.000321900995913893, + "loss_iou": 0.43359375, + "loss_num": 0.01409912109375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 454826896, + "step": 3601 + }, + { + "epoch": 0.9240043609311871, + "grad_norm": 62.7063102722168, + "learning_rate": 5e-06, + "loss": 0.8097, + "num_input_tokens_seen": 454952152, + "step": 3602 + }, + { + "epoch": 0.9240043609311871, + "loss": 0.8290905952453613, + "loss_ce": 0.00047733503743074834, + "loss_iou": 0.3828125, + "loss_num": 0.0123291015625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 454952152, + "step": 3602 + }, + { + "epoch": 0.9242608862951325, + "grad_norm": 52.665802001953125, + "learning_rate": 5e-06, + "loss": 0.893, + "num_input_tokens_seen": 455078584, + "step": 3603 + }, + { + "epoch": 0.9242608862951325, + "loss": 0.8709479570388794, + "loss_ce": 0.0013190761674195528, + "loss_iou": 0.40625, + "loss_num": 0.01153564453125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 455078584, + "step": 3603 + }, + { + "epoch": 0.9245174116590777, + "grad_norm": 58.03290939331055, + "learning_rate": 5e-06, + "loss": 0.7812, + "num_input_tokens_seen": 455205408, + "step": 3604 + }, + { + "epoch": 0.9245174116590777, + "loss": 0.7831684350967407, + "loss_ce": 0.0009418905829079449, + "loss_iou": 0.357421875, + "loss_num": 0.01336669921875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 455205408, + "step": 3604 + }, + { + "epoch": 0.9247739370230231, + "grad_norm": 45.604888916015625, + "learning_rate": 5e-06, + "loss": 1.0439, + "num_input_tokens_seen": 455331492, + "step": 3605 + }, + { + "epoch": 0.9247739370230231, + "loss": 0.974195122718811, + "loss_ce": 0.0005623459001071751, + "loss_iou": 0.455078125, + "loss_num": 0.01251220703125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 455331492, + "step": 3605 + }, + { + "epoch": 0.9250304623869685, + "grad_norm": 17.472393035888672, + "learning_rate": 5e-06, + "loss": 0.7814, + "num_input_tokens_seen": 455457104, + "step": 3606 + }, + { + "epoch": 0.9250304623869685, + "loss": 0.8266090750694275, + "loss_ce": 0.0019020545296370983, + "loss_iou": 0.38671875, + "loss_num": 0.01055908203125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 455457104, + "step": 3606 + }, + { + "epoch": 0.9252869877509139, + "grad_norm": 22.610918045043945, + "learning_rate": 5e-06, + "loss": 0.9, + "num_input_tokens_seen": 455583380, + "step": 3607 + }, + { + "epoch": 0.9252869877509139, + "loss": 1.0453102588653564, + "loss_ce": 0.0003884239122271538, + "loss_iou": 0.478515625, + "loss_num": 0.0179443359375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 455583380, + "step": 3607 + }, + { + "epoch": 0.9255435131148593, + "grad_norm": 28.802215576171875, + "learning_rate": 5e-06, + "loss": 0.8824, + "num_input_tokens_seen": 455709616, + "step": 3608 + }, + { + "epoch": 0.9255435131148593, + "loss": 0.7435708045959473, + "loss_ce": 0.0001626167504582554, + "loss_iou": 0.35546875, + "loss_num": 0.0064697265625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 455709616, + "step": 3608 + }, + { + "epoch": 0.9258000384788045, + "grad_norm": 32.86478042602539, + "learning_rate": 5e-06, + "loss": 0.9994, + "num_input_tokens_seen": 455834816, + "step": 3609 + }, + { + "epoch": 0.9258000384788045, + "loss": 0.9032742381095886, + "loss_ce": 0.0033718880731612444, + "loss_iou": 0.4296875, + "loss_num": 0.00823974609375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 455834816, + "step": 3609 + }, + { + "epoch": 0.9260565638427499, + "grad_norm": 46.2650146484375, + "learning_rate": 5e-06, + "loss": 0.9291, + "num_input_tokens_seen": 455961152, + "step": 3610 + }, + { + "epoch": 0.9260565638427499, + "loss": 0.9017542600631714, + "loss_ce": 0.002340232487767935, + "loss_iou": 0.408203125, + "loss_num": 0.0166015625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 455961152, + "step": 3610 + }, + { + "epoch": 0.9263130892066953, + "grad_norm": 48.566314697265625, + "learning_rate": 5e-06, + "loss": 1.0094, + "num_input_tokens_seen": 456087856, + "step": 3611 + }, + { + "epoch": 0.9263130892066953, + "loss": 1.0985465049743652, + "loss_ce": 0.0008902645204216242, + "loss_iou": 0.498046875, + "loss_num": 0.0203857421875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 456087856, + "step": 3611 + }, + { + "epoch": 0.9265696145706407, + "grad_norm": 52.738258361816406, + "learning_rate": 5e-06, + "loss": 0.9319, + "num_input_tokens_seen": 456214648, + "step": 3612 + }, + { + "epoch": 0.9265696145706407, + "loss": 1.1087466478347778, + "loss_ce": 0.00034821435110643506, + "loss_iou": 0.498046875, + "loss_num": 0.0224609375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 456214648, + "step": 3612 + }, + { + "epoch": 0.9268261399345861, + "grad_norm": 87.74256134033203, + "learning_rate": 5e-06, + "loss": 1.0374, + "num_input_tokens_seen": 456340876, + "step": 3613 + }, + { + "epoch": 0.9268261399345861, + "loss": 1.212399959564209, + "loss_ce": 0.002927233465015888, + "loss_iou": 0.55859375, + "loss_num": 0.0184326171875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 456340876, + "step": 3613 + }, + { + "epoch": 0.9270826652985313, + "grad_norm": 34.726375579833984, + "learning_rate": 5e-06, + "loss": 0.9775, + "num_input_tokens_seen": 456466792, + "step": 3614 + }, + { + "epoch": 0.9270826652985313, + "loss": 1.0891778469085693, + "loss_ce": 0.003240433521568775, + "loss_iou": 0.48828125, + "loss_num": 0.0216064453125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 456466792, + "step": 3614 + }, + { + "epoch": 0.9273391906624767, + "grad_norm": 42.018211364746094, + "learning_rate": 5e-06, + "loss": 0.9414, + "num_input_tokens_seen": 456592720, + "step": 3615 + }, + { + "epoch": 0.9273391906624767, + "loss": 0.7270696759223938, + "loss_ce": 0.0009954444831237197, + "loss_iou": 0.345703125, + "loss_num": 0.00689697265625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 456592720, + "step": 3615 + }, + { + "epoch": 0.9275957160264221, + "grad_norm": 51.56932830810547, + "learning_rate": 5e-06, + "loss": 0.8751, + "num_input_tokens_seen": 456718344, + "step": 3616 + }, + { + "epoch": 0.9275957160264221, + "loss": 0.9201453924179077, + "loss_ce": 0.00022348684433382004, + "loss_iou": 0.431640625, + "loss_num": 0.01141357421875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 456718344, + "step": 3616 + }, + { + "epoch": 0.9278522413903675, + "grad_norm": 50.86412048339844, + "learning_rate": 5e-06, + "loss": 1.082, + "num_input_tokens_seen": 456844544, + "step": 3617 + }, + { + "epoch": 0.9278522413903675, + "loss": 1.0754153728485107, + "loss_ce": 0.001684975577518344, + "loss_iou": 0.48828125, + "loss_num": 0.01904296875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 456844544, + "step": 3617 + }, + { + "epoch": 0.9281087667543129, + "grad_norm": 42.509864807128906, + "learning_rate": 5e-06, + "loss": 0.9267, + "num_input_tokens_seen": 456969720, + "step": 3618 + }, + { + "epoch": 0.9281087667543129, + "loss": 0.9366359710693359, + "loss_ce": 0.00255396100692451, + "loss_iou": 0.4375, + "loss_num": 0.011962890625, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 456969720, + "step": 3618 + }, + { + "epoch": 0.9283652921182582, + "grad_norm": 56.220130920410156, + "learning_rate": 5e-06, + "loss": 1.0288, + "num_input_tokens_seen": 457096896, + "step": 3619 + }, + { + "epoch": 0.9283652921182582, + "loss": 1.1990458965301514, + "loss_ce": 0.00031540400232188404, + "loss_iou": 0.5390625, + "loss_num": 0.02490234375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 457096896, + "step": 3619 + }, + { + "epoch": 0.9286218174822035, + "grad_norm": 69.47427368164062, + "learning_rate": 5e-06, + "loss": 1.0006, + "num_input_tokens_seen": 457223960, + "step": 3620 + }, + { + "epoch": 0.9286218174822035, + "loss": 1.0552663803100586, + "loss_ce": 0.001067073317244649, + "loss_iou": 0.470703125, + "loss_num": 0.022216796875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 457223960, + "step": 3620 + }, + { + "epoch": 0.9288783428461489, + "grad_norm": 43.77383041381836, + "learning_rate": 5e-06, + "loss": 0.9502, + "num_input_tokens_seen": 457349692, + "step": 3621 + }, + { + "epoch": 0.9288783428461489, + "loss": 0.9590688943862915, + "loss_ce": 8.446291030850261e-05, + "loss_iou": 0.455078125, + "loss_num": 0.010009765625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 457349692, + "step": 3621 + }, + { + "epoch": 0.9291348682100943, + "grad_norm": 21.029878616333008, + "learning_rate": 5e-06, + "loss": 0.7843, + "num_input_tokens_seen": 457475188, + "step": 3622 + }, + { + "epoch": 0.9291348682100943, + "loss": 0.6746558547019958, + "loss_ce": 9.533100092085078e-05, + "loss_iou": 0.318359375, + "loss_num": 0.007598876953125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 457475188, + "step": 3622 + }, + { + "epoch": 0.9293913935740397, + "grad_norm": 32.0560188293457, + "learning_rate": 5e-06, + "loss": 0.8874, + "num_input_tokens_seen": 457601012, + "step": 3623 + }, + { + "epoch": 0.9293913935740397, + "loss": 0.780167818069458, + "loss_ce": 0.00038270355435088277, + "loss_iou": 0.37109375, + "loss_num": 0.0078125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 457601012, + "step": 3623 + }, + { + "epoch": 0.929647918937985, + "grad_norm": 42.61419677734375, + "learning_rate": 5e-06, + "loss": 0.8395, + "num_input_tokens_seen": 457726752, + "step": 3624 + }, + { + "epoch": 0.929647918937985, + "loss": 0.901187539100647, + "loss_ce": 0.0012851858045905828, + "loss_iou": 0.416015625, + "loss_num": 0.0135498046875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 457726752, + "step": 3624 + }, + { + "epoch": 0.9299044443019303, + "grad_norm": 42.177120208740234, + "learning_rate": 5e-06, + "loss": 0.8281, + "num_input_tokens_seen": 457853292, + "step": 3625 + }, + { + "epoch": 0.9299044443019303, + "loss": 0.7232677936553955, + "loss_ce": 0.005006087943911552, + "loss_iou": 0.337890625, + "loss_num": 0.00860595703125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 457853292, + "step": 3625 + }, + { + "epoch": 0.9301609696658757, + "grad_norm": 48.13968276977539, + "learning_rate": 5e-06, + "loss": 0.8246, + "num_input_tokens_seen": 457978896, + "step": 3626 + }, + { + "epoch": 0.9301609696658757, + "loss": 0.8834838271141052, + "loss_ce": 0.00042718046461232007, + "loss_iou": 0.41796875, + "loss_num": 0.00885009765625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 457978896, + "step": 3626 + }, + { + "epoch": 0.9304174950298211, + "grad_norm": 53.01694869995117, + "learning_rate": 5e-06, + "loss": 0.9535, + "num_input_tokens_seen": 458106124, + "step": 3627 + }, + { + "epoch": 0.9304174950298211, + "loss": 0.9416027069091797, + "loss_ce": 0.0001964666589628905, + "loss_iou": 0.4453125, + "loss_num": 0.01007080078125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 458106124, + "step": 3627 + }, + { + "epoch": 0.9306740203937665, + "grad_norm": 55.440818786621094, + "learning_rate": 5e-06, + "loss": 0.9012, + "num_input_tokens_seen": 458232356, + "step": 3628 + }, + { + "epoch": 0.9306740203937665, + "loss": 0.8334956169128418, + "loss_ce": 0.000487843353766948, + "loss_iou": 0.396484375, + "loss_num": 0.008056640625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 458232356, + "step": 3628 + }, + { + "epoch": 0.9309305457577118, + "grad_norm": 55.91246795654297, + "learning_rate": 5e-06, + "loss": 0.9581, + "num_input_tokens_seen": 458359300, + "step": 3629 + }, + { + "epoch": 0.9309305457577118, + "loss": 1.0234041213989258, + "loss_ce": 0.008755714632570744, + "loss_iou": 0.45703125, + "loss_num": 0.0203857421875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 458359300, + "step": 3629 + }, + { + "epoch": 0.9311870711216571, + "grad_norm": 56.13252639770508, + "learning_rate": 5e-06, + "loss": 0.9764, + "num_input_tokens_seen": 458485020, + "step": 3630 + }, + { + "epoch": 0.9311870711216571, + "loss": 0.9368298053741455, + "loss_ce": 0.000794700812548399, + "loss_iou": 0.4375, + "loss_num": 0.0123291015625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 458485020, + "step": 3630 + }, + { + "epoch": 0.9314435964856025, + "grad_norm": 41.314849853515625, + "learning_rate": 5e-06, + "loss": 0.9195, + "num_input_tokens_seen": 458610780, + "step": 3631 + }, + { + "epoch": 0.9314435964856025, + "loss": 0.8165313005447388, + "loss_ce": 0.0011015902273356915, + "loss_iou": 0.38671875, + "loss_num": 0.00823974609375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 458610780, + "step": 3631 + }, + { + "epoch": 0.9317001218495479, + "grad_norm": 28.087688446044922, + "learning_rate": 5e-06, + "loss": 0.8585, + "num_input_tokens_seen": 458736188, + "step": 3632 + }, + { + "epoch": 0.9317001218495479, + "loss": 0.7565664649009705, + "loss_ce": 0.0011953659122809768, + "loss_iou": 0.3515625, + "loss_num": 0.0098876953125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 458736188, + "step": 3632 + }, + { + "epoch": 0.9319566472134932, + "grad_norm": 45.88303756713867, + "learning_rate": 5e-06, + "loss": 0.9229, + "num_input_tokens_seen": 458863872, + "step": 3633 + }, + { + "epoch": 0.9319566472134932, + "loss": 0.9181811809539795, + "loss_ce": 0.000456543464679271, + "loss_iou": 0.416015625, + "loss_num": 0.017333984375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 458863872, + "step": 3633 + }, + { + "epoch": 0.9322131725774386, + "grad_norm": 42.30138397216797, + "learning_rate": 5e-06, + "loss": 0.8692, + "num_input_tokens_seen": 458989892, + "step": 3634 + }, + { + "epoch": 0.9322131725774386, + "loss": 0.8961564302444458, + "loss_ce": 0.0016251273918896914, + "loss_iou": 0.412109375, + "loss_num": 0.01409912109375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 458989892, + "step": 3634 + }, + { + "epoch": 0.9324696979413839, + "grad_norm": 37.58060836791992, + "learning_rate": 5e-06, + "loss": 0.9469, + "num_input_tokens_seen": 459116316, + "step": 3635 + }, + { + "epoch": 0.9324696979413839, + "loss": 1.11305570602417, + "loss_ce": 0.0022158133797347546, + "loss_iou": 0.4765625, + "loss_num": 0.031494140625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 459116316, + "step": 3635 + }, + { + "epoch": 0.9327262233053293, + "grad_norm": 61.639183044433594, + "learning_rate": 5e-06, + "loss": 0.892, + "num_input_tokens_seen": 459242340, + "step": 3636 + }, + { + "epoch": 0.9327262233053293, + "loss": 0.9611966013908386, + "loss_ce": 0.0007474091253243387, + "loss_iou": 0.4453125, + "loss_num": 0.014404296875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 459242340, + "step": 3636 + }, + { + "epoch": 0.9329827486692747, + "grad_norm": 62.089813232421875, + "learning_rate": 5e-06, + "loss": 0.9647, + "num_input_tokens_seen": 459369136, + "step": 3637 + }, + { + "epoch": 0.9329827486692747, + "loss": 1.1087772846221924, + "loss_ce": 0.0037967923562973738, + "loss_iou": 0.5234375, + "loss_num": 0.01177978515625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 459369136, + "step": 3637 + }, + { + "epoch": 0.93323927403322, + "grad_norm": 58.67341995239258, + "learning_rate": 5e-06, + "loss": 0.8872, + "num_input_tokens_seen": 459496396, + "step": 3638 + }, + { + "epoch": 0.93323927403322, + "loss": 0.8088112473487854, + "loss_ce": 0.0011940447147935629, + "loss_iou": 0.38671875, + "loss_num": 0.006317138671875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 459496396, + "step": 3638 + }, + { + "epoch": 0.9334957993971654, + "grad_norm": 44.146175384521484, + "learning_rate": 5e-06, + "loss": 1.0315, + "num_input_tokens_seen": 459622532, + "step": 3639 + }, + { + "epoch": 0.9334957993971654, + "loss": 0.9892755746841431, + "loss_ce": 0.0014826415572315454, + "loss_iou": 0.45703125, + "loss_num": 0.01470947265625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 459622532, + "step": 3639 + }, + { + "epoch": 0.9337523247611108, + "grad_norm": 23.908166885375977, + "learning_rate": 5e-06, + "loss": 0.8639, + "num_input_tokens_seen": 459747636, + "step": 3640 + }, + { + "epoch": 0.9337523247611108, + "loss": 0.9046178460121155, + "loss_ce": 0.0044713616371154785, + "loss_iou": 0.41015625, + "loss_num": 0.015869140625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 459747636, + "step": 3640 + }, + { + "epoch": 0.9340088501250561, + "grad_norm": 32.32481384277344, + "learning_rate": 5e-06, + "loss": 0.7744, + "num_input_tokens_seen": 459873052, + "step": 3641 + }, + { + "epoch": 0.9340088501250561, + "loss": 0.7062988877296448, + "loss_ce": 0.000976654002442956, + "loss_iou": 0.337890625, + "loss_num": 0.005462646484375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 459873052, + "step": 3641 + }, + { + "epoch": 0.9342653754890015, + "grad_norm": 50.25830841064453, + "learning_rate": 5e-06, + "loss": 0.9323, + "num_input_tokens_seen": 459999188, + "step": 3642 + }, + { + "epoch": 0.9342653754890015, + "loss": 1.0281184911727905, + "loss_ce": 0.0022396312560886145, + "loss_iou": 0.453125, + "loss_num": 0.02392578125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 459999188, + "step": 3642 + }, + { + "epoch": 0.9345219008529468, + "grad_norm": 89.45670318603516, + "learning_rate": 5e-06, + "loss": 0.9455, + "num_input_tokens_seen": 460126464, + "step": 3643 + }, + { + "epoch": 0.9345219008529468, + "loss": 0.8807382583618164, + "loss_ce": 0.0010996382916346192, + "loss_iou": 0.404296875, + "loss_num": 0.013916015625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 460126464, + "step": 3643 + }, + { + "epoch": 0.9347784262168922, + "grad_norm": 66.78504943847656, + "learning_rate": 5e-06, + "loss": 0.9175, + "num_input_tokens_seen": 460253064, + "step": 3644 + }, + { + "epoch": 0.9347784262168922, + "loss": 0.8511741161346436, + "loss_ce": 9.992434934247285e-05, + "loss_iou": 0.408203125, + "loss_num": 0.006591796875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 460253064, + "step": 3644 + }, + { + "epoch": 0.9350349515808376, + "grad_norm": 53.24028015136719, + "learning_rate": 5e-06, + "loss": 0.9803, + "num_input_tokens_seen": 460378032, + "step": 3645 + }, + { + "epoch": 0.9350349515808376, + "loss": 0.9953749179840088, + "loss_ce": 0.0029433807358145714, + "loss_iou": 0.4609375, + "loss_num": 0.01416015625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 460378032, + "step": 3645 + }, + { + "epoch": 0.9352914769447829, + "grad_norm": 51.65861511230469, + "learning_rate": 5e-06, + "loss": 0.8478, + "num_input_tokens_seen": 460504200, + "step": 3646 + }, + { + "epoch": 0.9352914769447829, + "loss": 0.7812744379043579, + "loss_ce": 0.0010010175174102187, + "loss_iou": 0.36328125, + "loss_num": 0.01025390625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 460504200, + "step": 3646 + }, + { + "epoch": 0.9355480023087283, + "grad_norm": 22.881704330444336, + "learning_rate": 5e-06, + "loss": 0.9662, + "num_input_tokens_seen": 460630776, + "step": 3647 + }, + { + "epoch": 0.9355480023087283, + "loss": 1.030810832977295, + "loss_ce": 0.0005374557804316282, + "loss_iou": 0.4765625, + "loss_num": 0.0157470703125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 460630776, + "step": 3647 + }, + { + "epoch": 0.9358045276726736, + "grad_norm": 17.687257766723633, + "learning_rate": 5e-06, + "loss": 0.8345, + "num_input_tokens_seen": 460757308, + "step": 3648 + }, + { + "epoch": 0.9358045276726736, + "loss": 0.7762504816055298, + "loss_ce": 0.0037895129062235355, + "loss_iou": 0.357421875, + "loss_num": 0.01116943359375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 460757308, + "step": 3648 + }, + { + "epoch": 0.936061053036619, + "grad_norm": 19.461666107177734, + "learning_rate": 5e-06, + "loss": 0.8649, + "num_input_tokens_seen": 460884004, + "step": 3649 + }, + { + "epoch": 0.936061053036619, + "loss": 1.0872846841812134, + "loss_ce": 0.0003706810239236802, + "loss_iou": 0.490234375, + "loss_num": 0.021240234375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 460884004, + "step": 3649 + }, + { + "epoch": 0.9363175784005644, + "grad_norm": 22.722980499267578, + "learning_rate": 5e-06, + "loss": 1.0365, + "num_input_tokens_seen": 461009596, + "step": 3650 + }, + { + "epoch": 0.9363175784005644, + "loss": 1.154809594154358, + "loss_ce": 0.0014892304316163063, + "loss_iou": 0.5234375, + "loss_num": 0.02197265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 461009596, + "step": 3650 + }, + { + "epoch": 0.9365741037645097, + "grad_norm": 42.22420120239258, + "learning_rate": 5e-06, + "loss": 0.9204, + "num_input_tokens_seen": 461135524, + "step": 3651 + }, + { + "epoch": 0.9365741037645097, + "loss": 0.8227266073226929, + "loss_ce": 0.000949179579038173, + "loss_iou": 0.396484375, + "loss_num": 0.006195068359375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 461135524, + "step": 3651 + }, + { + "epoch": 0.936830629128455, + "grad_norm": 50.24898147583008, + "learning_rate": 5e-06, + "loss": 0.8997, + "num_input_tokens_seen": 461261168, + "step": 3652 + }, + { + "epoch": 0.936830629128455, + "loss": 0.9576809406280518, + "loss_ce": 0.0006497344584204257, + "loss_iou": 0.453125, + "loss_num": 0.01019287109375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 461261168, + "step": 3652 + }, + { + "epoch": 0.9370871544924004, + "grad_norm": 43.68164825439453, + "learning_rate": 5e-06, + "loss": 0.8666, + "num_input_tokens_seen": 461387196, + "step": 3653 + }, + { + "epoch": 0.9370871544924004, + "loss": 1.0145068168640137, + "loss_ce": 0.0052294377237558365, + "loss_iou": 0.466796875, + "loss_num": 0.01483154296875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 461387196, + "step": 3653 + }, + { + "epoch": 0.9373436798563458, + "grad_norm": 46.75363540649414, + "learning_rate": 5e-06, + "loss": 1.0214, + "num_input_tokens_seen": 461512724, + "step": 3654 + }, + { + "epoch": 0.9373436798563458, + "loss": 0.7811845541000366, + "loss_ce": 0.001399439643137157, + "loss_iou": 0.373046875, + "loss_num": 0.007080078125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 461512724, + "step": 3654 + }, + { + "epoch": 0.9376002052202912, + "grad_norm": 53.401100158691406, + "learning_rate": 5e-06, + "loss": 0.8979, + "num_input_tokens_seen": 461638060, + "step": 3655 + }, + { + "epoch": 0.9376002052202912, + "loss": 1.0050694942474365, + "loss_ce": 0.0009190713753923774, + "loss_iou": 0.462890625, + "loss_num": 0.0157470703125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 461638060, + "step": 3655 + }, + { + "epoch": 0.9378567305842365, + "grad_norm": 59.01826858520508, + "learning_rate": 5e-06, + "loss": 0.9241, + "num_input_tokens_seen": 461764684, + "step": 3656 + }, + { + "epoch": 0.9378567305842365, + "loss": 0.933512806892395, + "loss_ce": 0.0013839035527780652, + "loss_iou": 0.439453125, + "loss_num": 0.010498046875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 461764684, + "step": 3656 + }, + { + "epoch": 0.9381132559481818, + "grad_norm": 51.875030517578125, + "learning_rate": 5e-06, + "loss": 0.9358, + "num_input_tokens_seen": 461890704, + "step": 3657 + }, + { + "epoch": 0.9381132559481818, + "loss": 0.9886319637298584, + "loss_ce": 0.0013272779760882258, + "loss_iou": 0.45703125, + "loss_num": 0.0147705078125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 461890704, + "step": 3657 + }, + { + "epoch": 0.9383697813121272, + "grad_norm": 40.0760498046875, + "learning_rate": 5e-06, + "loss": 0.7648, + "num_input_tokens_seen": 462016656, + "step": 3658 + }, + { + "epoch": 0.9383697813121272, + "loss": 0.7332104444503784, + "loss_ce": 0.0005444451235234737, + "loss_iou": 0.357421875, + "loss_num": 0.0037078857421875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 462016656, + "step": 3658 + }, + { + "epoch": 0.9386263066760726, + "grad_norm": 38.35814666748047, + "learning_rate": 5e-06, + "loss": 0.905, + "num_input_tokens_seen": 462143496, + "step": 3659 + }, + { + "epoch": 0.9386263066760726, + "loss": 0.8020642995834351, + "loss_ce": 0.0007947119302116334, + "loss_iou": 0.3671875, + "loss_num": 0.013427734375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 462143496, + "step": 3659 + }, + { + "epoch": 0.938882832040018, + "grad_norm": 35.964195251464844, + "learning_rate": 5e-06, + "loss": 0.8655, + "num_input_tokens_seen": 462270156, + "step": 3660 + }, + { + "epoch": 0.938882832040018, + "loss": 0.8244752287864685, + "loss_ce": 0.0006226631812751293, + "loss_iou": 0.3828125, + "loss_num": 0.01123046875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 462270156, + "step": 3660 + }, + { + "epoch": 0.9391393574039634, + "grad_norm": 46.9801025390625, + "learning_rate": 5e-06, + "loss": 1.0206, + "num_input_tokens_seen": 462397404, + "step": 3661 + }, + { + "epoch": 0.9391393574039634, + "loss": 0.9656798839569092, + "loss_ce": 0.002300997031852603, + "loss_iou": 0.4375, + "loss_num": 0.01806640625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 462397404, + "step": 3661 + }, + { + "epoch": 0.9393958827679086, + "grad_norm": 56.319007873535156, + "learning_rate": 5e-06, + "loss": 0.94, + "num_input_tokens_seen": 462524196, + "step": 3662 + }, + { + "epoch": 0.9393958827679086, + "loss": 1.1229097843170166, + "loss_ce": 0.0003511852119117975, + "loss_iou": 0.50390625, + "loss_num": 0.0223388671875, + "loss_xval": 1.125, + "num_input_tokens_seen": 462524196, + "step": 3662 + }, + { + "epoch": 0.939652408131854, + "grad_norm": 47.455387115478516, + "learning_rate": 5e-06, + "loss": 0.8953, + "num_input_tokens_seen": 462650016, + "step": 3663 + }, + { + "epoch": 0.939652408131854, + "loss": 0.8016010522842407, + "loss_ce": 0.0008197662536986172, + "loss_iou": 0.3828125, + "loss_num": 0.00677490234375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 462650016, + "step": 3663 + }, + { + "epoch": 0.9399089334957994, + "grad_norm": 33.535064697265625, + "learning_rate": 5e-06, + "loss": 0.9892, + "num_input_tokens_seen": 462775232, + "step": 3664 + }, + { + "epoch": 0.9399089334957994, + "loss": 1.0777400732040405, + "loss_ce": 0.0020564687438309193, + "loss_iou": 0.4921875, + "loss_num": 0.0181884765625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 462775232, + "step": 3664 + }, + { + "epoch": 0.9401654588597448, + "grad_norm": 44.496517181396484, + "learning_rate": 5e-06, + "loss": 0.9211, + "num_input_tokens_seen": 462902160, + "step": 3665 + }, + { + "epoch": 0.9401654588597448, + "loss": 0.9035837650299072, + "loss_ce": 0.00026341804186813533, + "loss_iou": 0.421875, + "loss_num": 0.0113525390625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 462902160, + "step": 3665 + }, + { + "epoch": 0.9404219842236902, + "grad_norm": 65.98197937011719, + "learning_rate": 5e-06, + "loss": 0.9368, + "num_input_tokens_seen": 463028444, + "step": 3666 + }, + { + "epoch": 0.9404219842236902, + "loss": 0.8799336552619934, + "loss_ce": 0.0007833061972633004, + "loss_iou": 0.4140625, + "loss_num": 0.01019287109375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 463028444, + "step": 3666 + }, + { + "epoch": 0.9406785095876354, + "grad_norm": 46.119964599609375, + "learning_rate": 5e-06, + "loss": 0.8714, + "num_input_tokens_seen": 463154212, + "step": 3667 + }, + { + "epoch": 0.9406785095876354, + "loss": 1.0372419357299805, + "loss_ce": 0.0003767046728171408, + "loss_iou": 0.484375, + "loss_num": 0.01336669921875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 463154212, + "step": 3667 + }, + { + "epoch": 0.9409350349515808, + "grad_norm": 17.11545181274414, + "learning_rate": 5e-06, + "loss": 0.9543, + "num_input_tokens_seen": 463280008, + "step": 3668 + }, + { + "epoch": 0.9409350349515808, + "loss": 0.9404551386833191, + "loss_ce": 0.0005137299885973334, + "loss_iou": 0.416015625, + "loss_num": 0.021728515625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 463280008, + "step": 3668 + }, + { + "epoch": 0.9411915603155262, + "grad_norm": 18.474149703979492, + "learning_rate": 5e-06, + "loss": 0.8131, + "num_input_tokens_seen": 463406896, + "step": 3669 + }, + { + "epoch": 0.9411915603155262, + "loss": 0.7674820423126221, + "loss_ce": 0.0003921784518752247, + "loss_iou": 0.369140625, + "loss_num": 0.00567626953125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 463406896, + "step": 3669 + }, + { + "epoch": 0.9414480856794716, + "grad_norm": 29.74514389038086, + "learning_rate": 5e-06, + "loss": 0.8518, + "num_input_tokens_seen": 463532576, + "step": 3670 + }, + { + "epoch": 0.9414480856794716, + "loss": 0.948577344417572, + "loss_ce": 0.003020706120878458, + "loss_iou": 0.4296875, + "loss_num": 0.0174560546875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 463532576, + "step": 3670 + }, + { + "epoch": 0.941704611043417, + "grad_norm": 38.65204620361328, + "learning_rate": 5e-06, + "loss": 0.8229, + "num_input_tokens_seen": 463659208, + "step": 3671 + }, + { + "epoch": 0.941704611043417, + "loss": 0.766069233417511, + "loss_ce": 0.00020010270236525685, + "loss_iou": 0.36328125, + "loss_num": 0.00762939453125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 463659208, + "step": 3671 + }, + { + "epoch": 0.9419611364073622, + "grad_norm": 45.91474151611328, + "learning_rate": 5e-06, + "loss": 0.8713, + "num_input_tokens_seen": 463786220, + "step": 3672 + }, + { + "epoch": 0.9419611364073622, + "loss": 0.8210895657539368, + "loss_ce": 0.0005329824052751064, + "loss_iou": 0.38671875, + "loss_num": 0.00982666015625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 463786220, + "step": 3672 + }, + { + "epoch": 0.9422176617713076, + "grad_norm": 51.34685134887695, + "learning_rate": 5e-06, + "loss": 0.8784, + "num_input_tokens_seen": 463912412, + "step": 3673 + }, + { + "epoch": 0.9422176617713076, + "loss": 0.9496025443077087, + "loss_ce": 0.00038384145591408014, + "loss_iou": 0.435546875, + "loss_num": 0.0155029296875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 463912412, + "step": 3673 + }, + { + "epoch": 0.942474187135253, + "grad_norm": 49.33169937133789, + "learning_rate": 5e-06, + "loss": 0.8636, + "num_input_tokens_seen": 464038364, + "step": 3674 + }, + { + "epoch": 0.942474187135253, + "loss": 0.9540407657623291, + "loss_ce": 0.0018923444440588355, + "loss_iou": 0.451171875, + "loss_num": 0.00982666015625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 464038364, + "step": 3674 + }, + { + "epoch": 0.9427307124991984, + "grad_norm": 56.22955322265625, + "learning_rate": 5e-06, + "loss": 0.9139, + "num_input_tokens_seen": 464164724, + "step": 3675 + }, + { + "epoch": 0.9427307124991984, + "loss": 0.9937374591827393, + "loss_ce": 0.0008175497641786933, + "loss_iou": 0.462890625, + "loss_num": 0.01336669921875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 464164724, + "step": 3675 + }, + { + "epoch": 0.9429872378631438, + "grad_norm": 42.889610290527344, + "learning_rate": 5e-06, + "loss": 0.8155, + "num_input_tokens_seen": 464290780, + "step": 3676 + }, + { + "epoch": 0.9429872378631438, + "loss": 0.8258544206619263, + "loss_ce": 0.004565386101603508, + "loss_iou": 0.380859375, + "loss_num": 0.0120849609375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 464290780, + "step": 3676 + }, + { + "epoch": 0.943243763227089, + "grad_norm": 28.904233932495117, + "learning_rate": 5e-06, + "loss": 0.8226, + "num_input_tokens_seen": 464417488, + "step": 3677 + }, + { + "epoch": 0.943243763227089, + "loss": 0.7690805792808533, + "loss_ce": 0.0015024568419903517, + "loss_iou": 0.353515625, + "loss_num": 0.01202392578125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 464417488, + "step": 3677 + }, + { + "epoch": 0.9435002885910344, + "grad_norm": 24.394237518310547, + "learning_rate": 5e-06, + "loss": 0.899, + "num_input_tokens_seen": 464542888, + "step": 3678 + }, + { + "epoch": 0.9435002885910344, + "loss": 0.9014326333999634, + "loss_ce": 6.545497308252379e-05, + "loss_iou": 0.419921875, + "loss_num": 0.01202392578125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 464542888, + "step": 3678 + }, + { + "epoch": 0.9437568139549798, + "grad_norm": 36.753684997558594, + "learning_rate": 5e-06, + "loss": 0.8346, + "num_input_tokens_seen": 464668952, + "step": 3679 + }, + { + "epoch": 0.9437568139549798, + "loss": 0.839881181716919, + "loss_ce": 0.0010140028316527605, + "loss_iou": 0.404296875, + "loss_num": 0.00634765625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 464668952, + "step": 3679 + }, + { + "epoch": 0.9440133393189252, + "grad_norm": 43.150936126708984, + "learning_rate": 5e-06, + "loss": 0.9304, + "num_input_tokens_seen": 464795268, + "step": 3680 + }, + { + "epoch": 0.9440133393189252, + "loss": 0.7612678408622742, + "loss_ce": 0.001013916451483965, + "loss_iou": 0.361328125, + "loss_num": 0.00762939453125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 464795268, + "step": 3680 + }, + { + "epoch": 0.9442698646828706, + "grad_norm": 46.340431213378906, + "learning_rate": 5e-06, + "loss": 0.9256, + "num_input_tokens_seen": 464921296, + "step": 3681 + }, + { + "epoch": 0.9442698646828706, + "loss": 0.9561992287635803, + "loss_ce": 0.0020976890809834003, + "loss_iou": 0.42578125, + "loss_num": 0.0206298828125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 464921296, + "step": 3681 + }, + { + "epoch": 0.9445263900468159, + "grad_norm": 38.905548095703125, + "learning_rate": 5e-06, + "loss": 0.9243, + "num_input_tokens_seen": 465045980, + "step": 3682 + }, + { + "epoch": 0.9445263900468159, + "loss": 1.012319803237915, + "loss_ce": 0.00011273652489762753, + "loss_iou": 0.4609375, + "loss_num": 0.017822265625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 465045980, + "step": 3682 + }, + { + "epoch": 0.9447829154107612, + "grad_norm": 40.27955627441406, + "learning_rate": 5e-06, + "loss": 0.7801, + "num_input_tokens_seen": 465172884, + "step": 3683 + }, + { + "epoch": 0.9447829154107612, + "loss": 0.842250406742096, + "loss_ce": 0.0009418433764949441, + "loss_iou": 0.3828125, + "loss_num": 0.01531982421875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 465172884, + "step": 3683 + }, + { + "epoch": 0.9450394407747066, + "grad_norm": 43.6458740234375, + "learning_rate": 5e-06, + "loss": 1.0094, + "num_input_tokens_seen": 465298676, + "step": 3684 + }, + { + "epoch": 0.9450394407747066, + "loss": 1.2069745063781738, + "loss_ce": 0.00043152051512151957, + "loss_iou": 0.55859375, + "loss_num": 0.0179443359375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 465298676, + "step": 3684 + }, + { + "epoch": 0.945295966138652, + "grad_norm": 61.86376190185547, + "learning_rate": 5e-06, + "loss": 0.874, + "num_input_tokens_seen": 465425952, + "step": 3685 + }, + { + "epoch": 0.945295966138652, + "loss": 0.7857323288917542, + "loss_ce": 0.006923707202076912, + "loss_iou": 0.37109375, + "loss_num": 0.007537841796875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 465425952, + "step": 3685 + }, + { + "epoch": 0.9455524915025973, + "grad_norm": 46.10440444946289, + "learning_rate": 5e-06, + "loss": 0.9371, + "num_input_tokens_seen": 465552716, + "step": 3686 + }, + { + "epoch": 0.9455524915025973, + "loss": 0.8794236183166504, + "loss_ce": 0.0010056063765659928, + "loss_iou": 0.412109375, + "loss_num": 0.0106201171875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 465552716, + "step": 3686 + }, + { + "epoch": 0.9458090168665427, + "grad_norm": 27.472898483276367, + "learning_rate": 5e-06, + "loss": 0.9771, + "num_input_tokens_seen": 465680000, + "step": 3687 + }, + { + "epoch": 0.9458090168665427, + "loss": 0.8636884689331055, + "loss_ce": 0.001139703905209899, + "loss_iou": 0.392578125, + "loss_num": 0.01544189453125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 465680000, + "step": 3687 + }, + { + "epoch": 0.946065542230488, + "grad_norm": 33.50684356689453, + "learning_rate": 5e-06, + "loss": 0.8289, + "num_input_tokens_seen": 465805816, + "step": 3688 + }, + { + "epoch": 0.946065542230488, + "loss": 0.8775566220283508, + "loss_ce": 0.0006034750258550048, + "loss_iou": 0.41796875, + "loss_num": 0.008056640625, + "loss_xval": 0.875, + "num_input_tokens_seen": 465805816, + "step": 3688 + }, + { + "epoch": 0.9463220675944334, + "grad_norm": 43.293846130371094, + "learning_rate": 5e-06, + "loss": 0.8586, + "num_input_tokens_seen": 465931704, + "step": 3689 + }, + { + "epoch": 0.9463220675944334, + "loss": 0.7833787798881531, + "loss_ce": 0.00017566840688232332, + "loss_iou": 0.375, + "loss_num": 0.006805419921875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 465931704, + "step": 3689 + }, + { + "epoch": 0.9465785929583788, + "grad_norm": 40.77540588378906, + "learning_rate": 5e-06, + "loss": 0.839, + "num_input_tokens_seen": 466058468, + "step": 3690 + }, + { + "epoch": 0.9465785929583788, + "loss": 1.017223596572876, + "loss_ce": 0.0006221048533916473, + "loss_iou": 0.46484375, + "loss_num": 0.017578125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 466058468, + "step": 3690 + }, + { + "epoch": 0.9468351183223241, + "grad_norm": 59.32502746582031, + "learning_rate": 5e-06, + "loss": 0.9095, + "num_input_tokens_seen": 466185296, + "step": 3691 + }, + { + "epoch": 0.9468351183223241, + "loss": 0.7418409585952759, + "loss_ce": 0.00648941146209836, + "loss_iou": 0.353515625, + "loss_num": 0.0057373046875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 466185296, + "step": 3691 + }, + { + "epoch": 0.9470916436862695, + "grad_norm": 43.48308181762695, + "learning_rate": 5e-06, + "loss": 0.8813, + "num_input_tokens_seen": 466311512, + "step": 3692 + }, + { + "epoch": 0.9470916436862695, + "loss": 1.031076192855835, + "loss_ce": 0.0022675730288028717, + "loss_iou": 0.46875, + "loss_num": 0.0184326171875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 466311512, + "step": 3692 + }, + { + "epoch": 0.9473481690502148, + "grad_norm": 23.717506408691406, + "learning_rate": 5e-06, + "loss": 0.7842, + "num_input_tokens_seen": 466437752, + "step": 3693 + }, + { + "epoch": 0.9473481690502148, + "loss": 0.7203108072280884, + "loss_ce": 0.00046216571354307234, + "loss_iou": 0.326171875, + "loss_num": 0.0135498046875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 466437752, + "step": 3693 + }, + { + "epoch": 0.9476046944141602, + "grad_norm": 42.92779541015625, + "learning_rate": 5e-06, + "loss": 1.0188, + "num_input_tokens_seen": 466563664, + "step": 3694 + }, + { + "epoch": 0.9476046944141602, + "loss": 1.1214138269424438, + "loss_ce": 0.00032010371796786785, + "loss_iou": 0.515625, + "loss_num": 0.017333984375, + "loss_xval": 1.125, + "num_input_tokens_seen": 466563664, + "step": 3694 + }, + { + "epoch": 0.9478612197781056, + "grad_norm": 56.469242095947266, + "learning_rate": 5e-06, + "loss": 0.8992, + "num_input_tokens_seen": 466689696, + "step": 3695 + }, + { + "epoch": 0.9478612197781056, + "loss": 0.9403088092803955, + "loss_ce": 0.0008556349202990532, + "loss_iou": 0.431640625, + "loss_num": 0.01519775390625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 466689696, + "step": 3695 + }, + { + "epoch": 0.9481177451420509, + "grad_norm": 47.9663200378418, + "learning_rate": 5e-06, + "loss": 0.9895, + "num_input_tokens_seen": 466816772, + "step": 3696 + }, + { + "epoch": 0.9481177451420509, + "loss": 1.106593370437622, + "loss_ce": 0.0016128328861668706, + "loss_iou": 0.51171875, + "loss_num": 0.0169677734375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 466816772, + "step": 3696 + }, + { + "epoch": 0.9483742705059963, + "grad_norm": 47.70061492919922, + "learning_rate": 5e-06, + "loss": 0.8814, + "num_input_tokens_seen": 466943316, + "step": 3697 + }, + { + "epoch": 0.9483742705059963, + "loss": 0.8160202503204346, + "loss_ce": 0.00010233210196020082, + "loss_iou": 0.37109375, + "loss_num": 0.01531982421875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 466943316, + "step": 3697 + }, + { + "epoch": 0.9486307958699416, + "grad_norm": 42.98957061767578, + "learning_rate": 5e-06, + "loss": 0.9469, + "num_input_tokens_seen": 467069748, + "step": 3698 + }, + { + "epoch": 0.9486307958699416, + "loss": 0.6683716177940369, + "loss_ce": 0.0006470466614700854, + "loss_iou": 0.322265625, + "loss_num": 0.00433349609375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 467069748, + "step": 3698 + }, + { + "epoch": 0.948887321233887, + "grad_norm": 32.64594650268555, + "learning_rate": 5e-06, + "loss": 0.9006, + "num_input_tokens_seen": 467196448, + "step": 3699 + }, + { + "epoch": 0.948887321233887, + "loss": 0.7862898111343384, + "loss_ce": 0.0006452803499996662, + "loss_iou": 0.365234375, + "loss_num": 0.01104736328125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 467196448, + "step": 3699 + }, + { + "epoch": 0.9491438465978324, + "grad_norm": 37.68442916870117, + "learning_rate": 5e-06, + "loss": 0.9414, + "num_input_tokens_seen": 467323660, + "step": 3700 + }, + { + "epoch": 0.9491438465978324, + "loss": 0.9298077821731567, + "loss_ce": 0.002561731729656458, + "loss_iou": 0.421875, + "loss_num": 0.0169677734375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 467323660, + "step": 3700 + }, + { + "epoch": 0.9494003719617777, + "grad_norm": 47.4990348815918, + "learning_rate": 5e-06, + "loss": 0.8805, + "num_input_tokens_seen": 467450212, + "step": 3701 + }, + { + "epoch": 0.9494003719617777, + "loss": 0.9245873689651489, + "loss_ce": 0.0024681999348104, + "loss_iou": 0.41796875, + "loss_num": 0.017333984375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 467450212, + "step": 3701 + }, + { + "epoch": 0.9496568973257231, + "grad_norm": 42.406986236572266, + "learning_rate": 5e-06, + "loss": 0.8944, + "num_input_tokens_seen": 467576284, + "step": 3702 + }, + { + "epoch": 0.9496568973257231, + "loss": 0.7691506147384644, + "loss_ce": 0.00035180055419914424, + "loss_iou": 0.361328125, + "loss_num": 0.009033203125, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 467576284, + "step": 3702 + }, + { + "epoch": 0.9499134226896685, + "grad_norm": 37.537933349609375, + "learning_rate": 5e-06, + "loss": 0.8845, + "num_input_tokens_seen": 467703352, + "step": 3703 + }, + { + "epoch": 0.9499134226896685, + "loss": 1.1004343032836914, + "loss_ce": 0.004731159191578627, + "loss_iou": 0.4921875, + "loss_num": 0.0223388671875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 467703352, + "step": 3703 + }, + { + "epoch": 0.9501699480536138, + "grad_norm": 41.88649368286133, + "learning_rate": 5e-06, + "loss": 0.9663, + "num_input_tokens_seen": 467829652, + "step": 3704 + }, + { + "epoch": 0.9501699480536138, + "loss": 0.9605839848518372, + "loss_ce": 0.0006230776780284941, + "loss_iou": 0.44140625, + "loss_num": 0.015869140625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 467829652, + "step": 3704 + }, + { + "epoch": 0.9504264734175591, + "grad_norm": 68.59712219238281, + "learning_rate": 5e-06, + "loss": 0.9565, + "num_input_tokens_seen": 467955092, + "step": 3705 + }, + { + "epoch": 0.9504264734175591, + "loss": 0.8306628465652466, + "loss_ce": 0.002049524337053299, + "loss_iou": 0.39453125, + "loss_num": 0.00775146484375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 467955092, + "step": 3705 + }, + { + "epoch": 0.9506829987815045, + "grad_norm": 52.74204635620117, + "learning_rate": 5e-06, + "loss": 0.9608, + "num_input_tokens_seen": 468081248, + "step": 3706 + }, + { + "epoch": 0.9506829987815045, + "loss": 0.969720721244812, + "loss_ce": 0.00048247570521198213, + "loss_iou": 0.439453125, + "loss_num": 0.018310546875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 468081248, + "step": 3706 + }, + { + "epoch": 0.9509395241454499, + "grad_norm": 52.946876525878906, + "learning_rate": 5e-06, + "loss": 1.0146, + "num_input_tokens_seen": 468208124, + "step": 3707 + }, + { + "epoch": 0.9509395241454499, + "loss": 0.933703601360321, + "loss_ce": 0.000598156766500324, + "loss_iou": 0.4375, + "loss_num": 0.0115966796875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 468208124, + "step": 3707 + }, + { + "epoch": 0.9511960495093953, + "grad_norm": 35.36286163330078, + "learning_rate": 5e-06, + "loss": 0.9431, + "num_input_tokens_seen": 468333596, + "step": 3708 + }, + { + "epoch": 0.9511960495093953, + "loss": 0.8565495610237122, + "loss_ce": 0.000592526514083147, + "loss_iou": 0.392578125, + "loss_num": 0.01416015625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 468333596, + "step": 3708 + }, + { + "epoch": 0.9514525748733406, + "grad_norm": 28.405380249023438, + "learning_rate": 5e-06, + "loss": 0.7674, + "num_input_tokens_seen": 468460872, + "step": 3709 + }, + { + "epoch": 0.9514525748733406, + "loss": 0.7805876731872559, + "loss_ce": 0.000802493654191494, + "loss_iou": 0.357421875, + "loss_num": 0.0128173828125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 468460872, + "step": 3709 + }, + { + "epoch": 0.9517091002372859, + "grad_norm": 28.12193489074707, + "learning_rate": 5e-06, + "loss": 0.8429, + "num_input_tokens_seen": 468586144, + "step": 3710 + }, + { + "epoch": 0.9517091002372859, + "loss": 0.8768465518951416, + "loss_ce": 0.0006257933564484119, + "loss_iou": 0.412109375, + "loss_num": 0.0103759765625, + "loss_xval": 0.875, + "num_input_tokens_seen": 468586144, + "step": 3710 + }, + { + "epoch": 0.9519656256012313, + "grad_norm": 39.958927154541016, + "learning_rate": 5e-06, + "loss": 0.8794, + "num_input_tokens_seen": 468711804, + "step": 3711 + }, + { + "epoch": 0.9519656256012313, + "loss": 0.9256656169891357, + "loss_ce": 0.0006167399697005749, + "loss_iou": 0.421875, + "loss_num": 0.015869140625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 468711804, + "step": 3711 + }, + { + "epoch": 0.9522221509651767, + "grad_norm": 44.818870544433594, + "learning_rate": 5e-06, + "loss": 0.8668, + "num_input_tokens_seen": 468838132, + "step": 3712 + }, + { + "epoch": 0.9522221509651767, + "loss": 0.9414329528808594, + "loss_ce": 0.000514992862008512, + "loss_iou": 0.431640625, + "loss_num": 0.0152587890625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 468838132, + "step": 3712 + }, + { + "epoch": 0.9524786763291221, + "grad_norm": 33.827354431152344, + "learning_rate": 5e-06, + "loss": 0.9093, + "num_input_tokens_seen": 468964208, + "step": 3713 + }, + { + "epoch": 0.9524786763291221, + "loss": 0.9042959213256836, + "loss_ce": 0.0026845782995224, + "loss_iou": 0.419921875, + "loss_num": 0.0126953125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 468964208, + "step": 3713 + }, + { + "epoch": 0.9527352016930674, + "grad_norm": 46.68196487426758, + "learning_rate": 5e-06, + "loss": 0.8385, + "num_input_tokens_seen": 469091080, + "step": 3714 + }, + { + "epoch": 0.9527352016930674, + "loss": 0.8529127836227417, + "loss_ce": 0.0003737600054591894, + "loss_iou": 0.408203125, + "loss_num": 0.00701904296875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 469091080, + "step": 3714 + }, + { + "epoch": 0.9529917270570127, + "grad_norm": 68.15245819091797, + "learning_rate": 5e-06, + "loss": 1.0186, + "num_input_tokens_seen": 469216920, + "step": 3715 + }, + { + "epoch": 0.9529917270570127, + "loss": 1.0548653602600098, + "loss_ce": 0.0016427828231826425, + "loss_iou": 0.486328125, + "loss_num": 0.0162353515625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 469216920, + "step": 3715 + }, + { + "epoch": 0.9532482524209581, + "grad_norm": 53.18130111694336, + "learning_rate": 5e-06, + "loss": 0.9276, + "num_input_tokens_seen": 469344556, + "step": 3716 + }, + { + "epoch": 0.9532482524209581, + "loss": 1.0641156435012817, + "loss_ce": 0.004301172681152821, + "loss_iou": 0.462890625, + "loss_num": 0.0267333984375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 469344556, + "step": 3716 + }, + { + "epoch": 0.9535047777849035, + "grad_norm": 42.49665069580078, + "learning_rate": 5e-06, + "loss": 1.0328, + "num_input_tokens_seen": 469469620, + "step": 3717 + }, + { + "epoch": 0.9535047777849035, + "loss": 1.0054821968078613, + "loss_ce": 0.0015759584493935108, + "loss_iou": 0.46484375, + "loss_num": 0.01513671875, + "loss_xval": 1.0, + "num_input_tokens_seen": 469469620, + "step": 3717 + }, + { + "epoch": 0.9537613031488489, + "grad_norm": 30.475875854492188, + "learning_rate": 5e-06, + "loss": 0.8971, + "num_input_tokens_seen": 469594732, + "step": 3718 + }, + { + "epoch": 0.9537613031488489, + "loss": 0.8059152364730835, + "loss_ce": 0.0002511662896722555, + "loss_iou": 0.3828125, + "loss_num": 0.00811767578125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 469594732, + "step": 3718 + }, + { + "epoch": 0.9540178285127942, + "grad_norm": 38.36625289916992, + "learning_rate": 5e-06, + "loss": 0.9594, + "num_input_tokens_seen": 469721000, + "step": 3719 + }, + { + "epoch": 0.9540178285127942, + "loss": 0.9581032395362854, + "loss_ce": 0.000583761720918119, + "loss_iou": 0.439453125, + "loss_num": 0.015869140625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 469721000, + "step": 3719 + }, + { + "epoch": 0.9542743538767395, + "grad_norm": 38.52410125732422, + "learning_rate": 5e-06, + "loss": 0.9408, + "num_input_tokens_seen": 469846920, + "step": 3720 + }, + { + "epoch": 0.9542743538767395, + "loss": 0.8575612306594849, + "loss_ce": 0.0006276440690271556, + "loss_iou": 0.39453125, + "loss_num": 0.01312255859375, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 469846920, + "step": 3720 + }, + { + "epoch": 0.9545308792406849, + "grad_norm": 49.05297088623047, + "learning_rate": 5e-06, + "loss": 0.9711, + "num_input_tokens_seen": 469973608, + "step": 3721 + }, + { + "epoch": 0.9545308792406849, + "loss": 0.8868104219436646, + "loss_ce": 0.004730363842099905, + "loss_iou": 0.408203125, + "loss_num": 0.0133056640625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 469973608, + "step": 3721 + }, + { + "epoch": 0.9547874046046303, + "grad_norm": 49.10481643676758, + "learning_rate": 5e-06, + "loss": 0.9803, + "num_input_tokens_seen": 470100028, + "step": 3722 + }, + { + "epoch": 0.9547874046046303, + "loss": 1.051772117614746, + "loss_ce": 0.0005025180871598423, + "loss_iou": 0.490234375, + "loss_num": 0.013671875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 470100028, + "step": 3722 + }, + { + "epoch": 0.9550439299685757, + "grad_norm": 44.24982452392578, + "learning_rate": 5e-06, + "loss": 0.9829, + "num_input_tokens_seen": 470226356, + "step": 3723 + }, + { + "epoch": 0.9550439299685757, + "loss": 1.1333545446395874, + "loss_ce": 0.0015185597585514188, + "loss_iou": 0.5234375, + "loss_num": 0.017578125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 470226356, + "step": 3723 + }, + { + "epoch": 0.955300455332521, + "grad_norm": 57.111114501953125, + "learning_rate": 5e-06, + "loss": 0.81, + "num_input_tokens_seen": 470353288, + "step": 3724 + }, + { + "epoch": 0.955300455332521, + "loss": 0.9511011242866516, + "loss_ce": 0.0006617032922804356, + "loss_iou": 0.4453125, + "loss_num": 0.01220703125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 470353288, + "step": 3724 + }, + { + "epoch": 0.9555569806964663, + "grad_norm": 56.31776428222656, + "learning_rate": 5e-06, + "loss": 0.8451, + "num_input_tokens_seen": 470479236, + "step": 3725 + }, + { + "epoch": 0.9555569806964663, + "loss": 0.8598451614379883, + "loss_ce": 0.0004702183650806546, + "loss_iou": 0.404296875, + "loss_num": 0.0096435546875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 470479236, + "step": 3725 + }, + { + "epoch": 0.9558135060604117, + "grad_norm": 55.03281021118164, + "learning_rate": 5e-06, + "loss": 0.9499, + "num_input_tokens_seen": 470606296, + "step": 3726 + }, + { + "epoch": 0.9558135060604117, + "loss": 0.8283292651176453, + "loss_ce": 0.0031339661218225956, + "loss_iou": 0.36328125, + "loss_num": 0.0196533203125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 470606296, + "step": 3726 + }, + { + "epoch": 0.9560700314243571, + "grad_norm": 48.802059173583984, + "learning_rate": 5e-06, + "loss": 0.9125, + "num_input_tokens_seen": 470731420, + "step": 3727 + }, + { + "epoch": 0.9560700314243571, + "loss": 1.2767093181610107, + "loss_ce": 0.000830433564260602, + "loss_iou": 0.57421875, + "loss_num": 0.025634765625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 470731420, + "step": 3727 + }, + { + "epoch": 0.9563265567883025, + "grad_norm": 49.176475524902344, + "learning_rate": 5e-06, + "loss": 0.8638, + "num_input_tokens_seen": 470857928, + "step": 3728 + }, + { + "epoch": 0.9563265567883025, + "loss": 0.7283087372779846, + "loss_ce": 0.00028139573987573385, + "loss_iou": 0.3515625, + "loss_num": 0.004608154296875, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 470857928, + "step": 3728 + }, + { + "epoch": 0.9565830821522479, + "grad_norm": 55.19975662231445, + "learning_rate": 5e-06, + "loss": 0.8153, + "num_input_tokens_seen": 470983320, + "step": 3729 + }, + { + "epoch": 0.9565830821522479, + "loss": 0.9633890390396118, + "loss_ce": 0.001963268034160137, + "loss_iou": 0.455078125, + "loss_num": 0.0101318359375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 470983320, + "step": 3729 + }, + { + "epoch": 0.9568396075161931, + "grad_norm": 46.52829360961914, + "learning_rate": 5e-06, + "loss": 1.007, + "num_input_tokens_seen": 471109344, + "step": 3730 + }, + { + "epoch": 0.9568396075161931, + "loss": 1.0144968032836914, + "loss_ce": 0.004731196444481611, + "loss_iou": 0.458984375, + "loss_num": 0.01806640625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 471109344, + "step": 3730 + }, + { + "epoch": 0.9570961328801385, + "grad_norm": 37.07002639770508, + "learning_rate": 5e-06, + "loss": 0.9242, + "num_input_tokens_seen": 471235136, + "step": 3731 + }, + { + "epoch": 0.9570961328801385, + "loss": 0.9647266864776611, + "loss_ce": 0.0003712321340572089, + "loss_iou": 0.4453125, + "loss_num": 0.01434326171875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 471235136, + "step": 3731 + }, + { + "epoch": 0.9573526582440839, + "grad_norm": 33.90400695800781, + "learning_rate": 5e-06, + "loss": 0.9179, + "num_input_tokens_seen": 471360944, + "step": 3732 + }, + { + "epoch": 0.9573526582440839, + "loss": 1.016692876815796, + "loss_ce": 0.0020444290712475777, + "loss_iou": 0.435546875, + "loss_num": 0.02880859375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 471360944, + "step": 3732 + }, + { + "epoch": 0.9576091836080293, + "grad_norm": 35.045352935791016, + "learning_rate": 5e-06, + "loss": 0.9199, + "num_input_tokens_seen": 471487496, + "step": 3733 + }, + { + "epoch": 0.9576091836080293, + "loss": 0.8688026666641235, + "loss_ce": 0.0006386188324540854, + "loss_iou": 0.40625, + "loss_num": 0.01123046875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 471487496, + "step": 3733 + }, + { + "epoch": 0.9578657089719746, + "grad_norm": 45.78194046020508, + "learning_rate": 5e-06, + "loss": 0.8633, + "num_input_tokens_seen": 471613500, + "step": 3734 + }, + { + "epoch": 0.9578657089719746, + "loss": 1.1330904960632324, + "loss_ce": 0.003695887280628085, + "loss_iou": 0.51171875, + "loss_num": 0.0218505859375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 471613500, + "step": 3734 + }, + { + "epoch": 0.9581222343359199, + "grad_norm": 53.44545364379883, + "learning_rate": 5e-06, + "loss": 0.85, + "num_input_tokens_seen": 471739428, + "step": 3735 + }, + { + "epoch": 0.9581222343359199, + "loss": 0.8890461921691895, + "loss_ce": 0.008919235318899155, + "loss_iou": 0.39453125, + "loss_num": 0.018310546875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 471739428, + "step": 3735 + }, + { + "epoch": 0.9583787596998653, + "grad_norm": 43.31914138793945, + "learning_rate": 5e-06, + "loss": 0.9573, + "num_input_tokens_seen": 471865228, + "step": 3736 + }, + { + "epoch": 0.9583787596998653, + "loss": 1.0277109146118164, + "loss_ce": 0.0006113672279752791, + "loss_iou": 0.4765625, + "loss_num": 0.01519775390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 471865228, + "step": 3736 + }, + { + "epoch": 0.9586352850638107, + "grad_norm": 46.960105895996094, + "learning_rate": 5e-06, + "loss": 0.8798, + "num_input_tokens_seen": 471991716, + "step": 3737 + }, + { + "epoch": 0.9586352850638107, + "loss": 0.848606526851654, + "loss_ce": 0.0037578989285975695, + "loss_iou": 0.36328125, + "loss_num": 0.0238037109375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 471991716, + "step": 3737 + }, + { + "epoch": 0.9588918104277561, + "grad_norm": 42.24656677246094, + "learning_rate": 5e-06, + "loss": 0.9303, + "num_input_tokens_seen": 472117944, + "step": 3738 + }, + { + "epoch": 0.9588918104277561, + "loss": 0.7858527898788452, + "loss_ce": 0.0016731544164940715, + "loss_iou": 0.35546875, + "loss_num": 0.01422119140625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 472117944, + "step": 3738 + }, + { + "epoch": 0.9591483357917014, + "grad_norm": 40.11541748046875, + "learning_rate": 5e-06, + "loss": 0.8473, + "num_input_tokens_seen": 472245048, + "step": 3739 + }, + { + "epoch": 0.9591483357917014, + "loss": 0.7120316028594971, + "loss_ce": 0.001582384342327714, + "loss_iou": 0.330078125, + "loss_num": 0.01025390625, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 472245048, + "step": 3739 + }, + { + "epoch": 0.9594048611556467, + "grad_norm": 48.304927825927734, + "learning_rate": 5e-06, + "loss": 0.8505, + "num_input_tokens_seen": 472370164, + "step": 3740 + }, + { + "epoch": 0.9594048611556467, + "loss": 0.7848272323608398, + "loss_ce": 0.0004034331941511482, + "loss_iou": 0.361328125, + "loss_num": 0.01226806640625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 472370164, + "step": 3740 + }, + { + "epoch": 0.9596613865195921, + "grad_norm": 64.52574920654297, + "learning_rate": 5e-06, + "loss": 0.928, + "num_input_tokens_seen": 472496728, + "step": 3741 + }, + { + "epoch": 0.9596613865195921, + "loss": 1.0613837242126465, + "loss_ce": 0.0027899255510419607, + "loss_iou": 0.482421875, + "loss_num": 0.018310546875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 472496728, + "step": 3741 + }, + { + "epoch": 0.9599179118835375, + "grad_norm": 59.014198303222656, + "learning_rate": 5e-06, + "loss": 0.9887, + "num_input_tokens_seen": 472623668, + "step": 3742 + }, + { + "epoch": 0.9599179118835375, + "loss": 0.9604536294937134, + "loss_ce": 0.0009809542680159211, + "loss_iou": 0.44921875, + "loss_num": 0.012451171875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 472623668, + "step": 3742 + }, + { + "epoch": 0.9601744372474829, + "grad_norm": 39.682281494140625, + "learning_rate": 5e-06, + "loss": 0.9474, + "num_input_tokens_seen": 472748204, + "step": 3743 + }, + { + "epoch": 0.9601744372474829, + "loss": 1.0365900993347168, + "loss_ce": 0.0009454383980482817, + "loss_iou": 0.466796875, + "loss_num": 0.019775390625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 472748204, + "step": 3743 + }, + { + "epoch": 0.9604309626114282, + "grad_norm": 42.579471588134766, + "learning_rate": 5e-06, + "loss": 0.8822, + "num_input_tokens_seen": 472875364, + "step": 3744 + }, + { + "epoch": 0.9604309626114282, + "loss": 0.9522095322608948, + "loss_ce": 0.00030522566521540284, + "loss_iou": 0.44140625, + "loss_num": 0.0140380859375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 472875364, + "step": 3744 + }, + { + "epoch": 0.9606874879753735, + "grad_norm": 50.972434997558594, + "learning_rate": 5e-06, + "loss": 0.9854, + "num_input_tokens_seen": 473001916, + "step": 3745 + }, + { + "epoch": 0.9606874879753735, + "loss": 1.01099693775177, + "loss_ce": 0.0007430281257256866, + "loss_iou": 0.4609375, + "loss_num": 0.01806640625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 473001916, + "step": 3745 + }, + { + "epoch": 0.9609440133393189, + "grad_norm": 42.45807647705078, + "learning_rate": 5e-06, + "loss": 0.8632, + "num_input_tokens_seen": 473128576, + "step": 3746 + }, + { + "epoch": 0.9609440133393189, + "loss": 0.7900751829147339, + "loss_ce": 0.0005243601626716554, + "loss_iou": 0.380859375, + "loss_num": 0.005889892578125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 473128576, + "step": 3746 + }, + { + "epoch": 0.9612005387032643, + "grad_norm": 68.20001983642578, + "learning_rate": 5e-06, + "loss": 0.8823, + "num_input_tokens_seen": 473256068, + "step": 3747 + }, + { + "epoch": 0.9612005387032643, + "loss": 0.804835319519043, + "loss_ce": 0.00014778485638089478, + "loss_iou": 0.380859375, + "loss_num": 0.00830078125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 473256068, + "step": 3747 + }, + { + "epoch": 0.9614570640672097, + "grad_norm": 47.22114562988281, + "learning_rate": 5e-06, + "loss": 0.8793, + "num_input_tokens_seen": 473382484, + "step": 3748 + }, + { + "epoch": 0.9614570640672097, + "loss": 0.7414042949676514, + "loss_ce": 0.00019337376579642296, + "loss_iou": 0.349609375, + "loss_num": 0.0084228515625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 473382484, + "step": 3748 + }, + { + "epoch": 0.961713589431155, + "grad_norm": 18.614967346191406, + "learning_rate": 5e-06, + "loss": 0.8882, + "num_input_tokens_seen": 473509912, + "step": 3749 + }, + { + "epoch": 0.961713589431155, + "loss": 0.8926327228546143, + "loss_ce": 0.0027401724364608526, + "loss_iou": 0.40625, + "loss_num": 0.01544189453125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 473509912, + "step": 3749 + }, + { + "epoch": 0.9619701147951004, + "grad_norm": 18.26949691772461, + "learning_rate": 5e-06, + "loss": 0.8563, + "num_input_tokens_seen": 473637088, + "step": 3750 + }, + { + "epoch": 0.9619701147951004, + "eval_icons_CIoU": 0.28159932792186737, + "eval_icons_GIoU": 0.23562762141227722, + "eval_icons_IoU": 0.4743051379919052, + "eval_icons_MAE_all": 0.028068319894373417, + "eval_icons_MAE_h": 0.03759913891553879, + "eval_icons_MAE_w": 0.06105516292154789, + "eval_icons_MAE_x_boxes": 0.05570170655846596, + "eval_icons_MAE_y_boxes": 0.03951370343565941, + "eval_icons_NUM_probability": 0.9995871186256409, + "eval_icons_inside_bbox": 0.6805555522441864, + "eval_icons_loss": 1.634150505065918, + "eval_icons_loss_ce": 7.489781819458585e-05, + "eval_icons_loss_iou": 0.743896484375, + "eval_icons_loss_num": 0.030408859252929688, + "eval_icons_loss_xval": 1.640625, + "eval_icons_runtime": 58.2421, + "eval_icons_samples_per_second": 0.858, + "eval_icons_steps_per_second": 0.034, + "num_input_tokens_seen": 473637088, + "step": 3750 + }, + { + "epoch": 0.9619701147951004, + "eval_screenspot_CIoU": 0.139450969795386, + "eval_screenspot_GIoU": 0.12380100786685944, + "eval_screenspot_IoU": 0.3015509694814682, + "eval_screenspot_MAE_all": 0.07744511092702548, + "eval_screenspot_MAE_h": 0.0702316810687383, + "eval_screenspot_MAE_w": 0.1192569633324941, + "eval_screenspot_MAE_x_boxes": 0.1045909399787585, + "eval_screenspot_MAE_y_boxes": 0.05536154036720594, + "eval_screenspot_NUM_probability": 0.9999455809593201, + "eval_screenspot_inside_bbox": 0.6016666690508524, + "eval_screenspot_loss": 2.1740405559539795, + "eval_screenspot_loss_ce": 0.0010580015562785168, + "eval_screenspot_loss_iou": 0.8953450520833334, + "eval_screenspot_loss_num": 0.08156585693359375, + "eval_screenspot_loss_xval": 2.1985677083333335, + "eval_screenspot_runtime": 111.2254, + "eval_screenspot_samples_per_second": 0.8, + "eval_screenspot_steps_per_second": 0.027, + "num_input_tokens_seen": 473637088, + "step": 3750 + }, + { + "epoch": 0.9619701147951004, + "loss": 2.1521248817443848, + "loss_ce": 0.00075742625631392, + "loss_iou": 0.890625, + "loss_num": 0.07421875, + "loss_xval": 2.15625, + "num_input_tokens_seen": 473637088, + "step": 3750 + }, + { + "epoch": 0.9622266401590457, + "grad_norm": 15.118363380432129, + "learning_rate": 5e-06, + "loss": 0.7363, + "num_input_tokens_seen": 473762988, + "step": 3751 + }, + { + "epoch": 0.9622266401590457, + "loss": 0.6468114256858826, + "loss_ce": 0.00032707888749428093, + "loss_iou": 0.3125, + "loss_num": 0.00421142578125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 473762988, + "step": 3751 + }, + { + "epoch": 0.9624831655229911, + "grad_norm": 20.239749908447266, + "learning_rate": 5e-06, + "loss": 0.9043, + "num_input_tokens_seen": 473889364, + "step": 3752 + }, + { + "epoch": 0.9624831655229911, + "loss": 0.7558808326721191, + "loss_ce": 0.0024628937244415283, + "loss_iou": 0.349609375, + "loss_num": 0.01080322265625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 473889364, + "step": 3752 + }, + { + "epoch": 0.9627396908869365, + "grad_norm": 27.54178810119629, + "learning_rate": 5e-06, + "loss": 0.7943, + "num_input_tokens_seen": 474016532, + "step": 3753 + }, + { + "epoch": 0.9627396908869365, + "loss": 0.8111331462860107, + "loss_ce": 0.0013186900177970529, + "loss_iou": 0.376953125, + "loss_num": 0.01080322265625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 474016532, + "step": 3753 + }, + { + "epoch": 0.9629962162508818, + "grad_norm": 62.48238754272461, + "learning_rate": 5e-06, + "loss": 0.7971, + "num_input_tokens_seen": 474143172, + "step": 3754 + }, + { + "epoch": 0.9629962162508818, + "loss": 0.7653334140777588, + "loss_ce": 0.00019668778986670077, + "loss_iou": 0.359375, + "loss_num": 0.00921630859375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 474143172, + "step": 3754 + }, + { + "epoch": 0.9632527416148272, + "grad_norm": 44.72796630859375, + "learning_rate": 5e-06, + "loss": 0.9834, + "num_input_tokens_seen": 474269056, + "step": 3755 + }, + { + "epoch": 0.9632527416148272, + "loss": 1.1097055673599243, + "loss_ce": 0.0008188991341739893, + "loss_iou": 0.498046875, + "loss_num": 0.0228271484375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 474269056, + "step": 3755 + }, + { + "epoch": 0.9635092669787725, + "grad_norm": 21.309350967407227, + "learning_rate": 5e-06, + "loss": 0.9436, + "num_input_tokens_seen": 474395460, + "step": 3756 + }, + { + "epoch": 0.9635092669787725, + "loss": 1.0083272457122803, + "loss_ce": 0.0014913163613528013, + "loss_iou": 0.4609375, + "loss_num": 0.017333984375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 474395460, + "step": 3756 + }, + { + "epoch": 0.9637657923427179, + "grad_norm": 28.973154067993164, + "learning_rate": 5e-06, + "loss": 0.9667, + "num_input_tokens_seen": 474521812, + "step": 3757 + }, + { + "epoch": 0.9637657923427179, + "loss": 0.8583625555038452, + "loss_ce": 0.000940679747145623, + "loss_iou": 0.380859375, + "loss_num": 0.0191650390625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 474521812, + "step": 3757 + }, + { + "epoch": 0.9640223177066632, + "grad_norm": 26.086645126342773, + "learning_rate": 5e-06, + "loss": 0.8433, + "num_input_tokens_seen": 474648048, + "step": 3758 + }, + { + "epoch": 0.9640223177066632, + "loss": 0.8629547357559204, + "loss_ce": 0.00016175792552530766, + "loss_iou": 0.41015625, + "loss_num": 0.0089111328125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 474648048, + "step": 3758 + }, + { + "epoch": 0.9642788430706086, + "grad_norm": 38.165348052978516, + "learning_rate": 5e-06, + "loss": 0.8225, + "num_input_tokens_seen": 474775276, + "step": 3759 + }, + { + "epoch": 0.9642788430706086, + "loss": 0.9540954828262329, + "loss_ce": 0.0029236450791358948, + "loss_iou": 0.421875, + "loss_num": 0.0211181640625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 474775276, + "step": 3759 + }, + { + "epoch": 0.964535368434554, + "grad_norm": 56.49632263183594, + "learning_rate": 5e-06, + "loss": 0.9588, + "num_input_tokens_seen": 474901596, + "step": 3760 + }, + { + "epoch": 0.964535368434554, + "loss": 0.7659876942634583, + "loss_ce": 0.0003626969119068235, + "loss_iou": 0.369140625, + "loss_num": 0.00518798828125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 474901596, + "step": 3760 + }, + { + "epoch": 0.9647918937984993, + "grad_norm": 44.99468994140625, + "learning_rate": 5e-06, + "loss": 1.0001, + "num_input_tokens_seen": 475027836, + "step": 3761 + }, + { + "epoch": 0.9647918937984993, + "loss": 0.8458908796310425, + "loss_ce": 0.00018773060583043844, + "loss_iou": 0.404296875, + "loss_num": 0.006988525390625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 475027836, + "step": 3761 + }, + { + "epoch": 0.9650484191624447, + "grad_norm": 41.79753494262695, + "learning_rate": 5e-06, + "loss": 0.8195, + "num_input_tokens_seen": 475155716, + "step": 3762 + }, + { + "epoch": 0.9650484191624447, + "loss": 0.8121501207351685, + "loss_ce": 0.00013845294597558677, + "loss_iou": 0.380859375, + "loss_num": 0.01031494140625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 475155716, + "step": 3762 + }, + { + "epoch": 0.96530494452639, + "grad_norm": 42.61994171142578, + "learning_rate": 5e-06, + "loss": 0.8966, + "num_input_tokens_seen": 475281868, + "step": 3763 + }, + { + "epoch": 0.96530494452639, + "loss": 0.9151062965393066, + "loss_ce": 0.0029969641473144293, + "loss_iou": 0.423828125, + "loss_num": 0.01251220703125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 475281868, + "step": 3763 + }, + { + "epoch": 0.9655614698903354, + "grad_norm": 39.85020446777344, + "learning_rate": 5e-06, + "loss": 0.8359, + "num_input_tokens_seen": 475408504, + "step": 3764 + }, + { + "epoch": 0.9655614698903354, + "loss": 0.9258280396461487, + "loss_ce": 0.00029090908356010914, + "loss_iou": 0.435546875, + "loss_num": 0.01068115234375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 475408504, + "step": 3764 + }, + { + "epoch": 0.9658179952542808, + "grad_norm": 29.624691009521484, + "learning_rate": 5e-06, + "loss": 0.9379, + "num_input_tokens_seen": 475532992, + "step": 3765 + }, + { + "epoch": 0.9658179952542808, + "loss": 0.9038563966751099, + "loss_ce": 0.0015126244397833943, + "loss_iou": 0.4140625, + "loss_num": 0.0147705078125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 475532992, + "step": 3765 + }, + { + "epoch": 0.9660745206182261, + "grad_norm": 29.585498809814453, + "learning_rate": 5e-06, + "loss": 0.9796, + "num_input_tokens_seen": 475659856, + "step": 3766 + }, + { + "epoch": 0.9660745206182261, + "loss": 0.9447678327560425, + "loss_ce": 0.0014084293507039547, + "loss_iou": 0.416015625, + "loss_num": 0.0223388671875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 475659856, + "step": 3766 + }, + { + "epoch": 0.9663310459821715, + "grad_norm": 44.51248550415039, + "learning_rate": 5e-06, + "loss": 0.9305, + "num_input_tokens_seen": 475785884, + "step": 3767 + }, + { + "epoch": 0.9663310459821715, + "loss": 0.9744595289230347, + "loss_ce": 0.0003384186129551381, + "loss_iou": 0.447265625, + "loss_num": 0.015625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 475785884, + "step": 3767 + }, + { + "epoch": 0.9665875713461168, + "grad_norm": 39.980674743652344, + "learning_rate": 5e-06, + "loss": 0.9248, + "num_input_tokens_seen": 475912860, + "step": 3768 + }, + { + "epoch": 0.9665875713461168, + "loss": 0.8187769055366516, + "loss_ce": 0.0004175486392341554, + "loss_iou": 0.390625, + "loss_num": 0.007080078125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 475912860, + "step": 3768 + }, + { + "epoch": 0.9668440967100622, + "grad_norm": 62.116050720214844, + "learning_rate": 5e-06, + "loss": 0.7571, + "num_input_tokens_seen": 476040564, + "step": 3769 + }, + { + "epoch": 0.9668440967100622, + "loss": 0.8192197680473328, + "loss_ce": 0.0018369618337601423, + "loss_iou": 0.388671875, + "loss_num": 0.00811767578125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 476040564, + "step": 3769 + }, + { + "epoch": 0.9671006220740076, + "grad_norm": 42.37794876098633, + "learning_rate": 5e-06, + "loss": 0.9116, + "num_input_tokens_seen": 476165888, + "step": 3770 + }, + { + "epoch": 0.9671006220740076, + "loss": 0.974213182926178, + "loss_ce": 0.002045197645202279, + "loss_iou": 0.4453125, + "loss_num": 0.0159912109375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 476165888, + "step": 3770 + }, + { + "epoch": 0.967357147437953, + "grad_norm": 33.12092208862305, + "learning_rate": 5e-06, + "loss": 0.8708, + "num_input_tokens_seen": 476293396, + "step": 3771 + }, + { + "epoch": 0.967357147437953, + "loss": 0.6794819831848145, + "loss_ce": 0.0005269336979836226, + "loss_iou": 0.328125, + "loss_num": 0.00469970703125, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 476293396, + "step": 3771 + }, + { + "epoch": 0.9676136728018983, + "grad_norm": 36.52384948730469, + "learning_rate": 5e-06, + "loss": 0.8753, + "num_input_tokens_seen": 476419128, + "step": 3772 + }, + { + "epoch": 0.9676136728018983, + "loss": 0.9944947957992554, + "loss_ce": 0.0008425063570030034, + "loss_iou": 0.4609375, + "loss_num": 0.01416015625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 476419128, + "step": 3772 + }, + { + "epoch": 0.9678701981658436, + "grad_norm": 31.59944725036621, + "learning_rate": 5e-06, + "loss": 0.8248, + "num_input_tokens_seen": 476545764, + "step": 3773 + }, + { + "epoch": 0.9678701981658436, + "loss": 0.763606071472168, + "loss_ce": 0.0004224727163091302, + "loss_iou": 0.361328125, + "loss_num": 0.008056640625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 476545764, + "step": 3773 + }, + { + "epoch": 0.968126723529789, + "grad_norm": 36.53330993652344, + "learning_rate": 5e-06, + "loss": 0.8699, + "num_input_tokens_seen": 476671620, + "step": 3774 + }, + { + "epoch": 0.968126723529789, + "loss": 0.9843180179595947, + "loss_ce": 0.0018961526220664382, + "loss_iou": 0.458984375, + "loss_num": 0.012939453125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 476671620, + "step": 3774 + }, + { + "epoch": 0.9683832488937344, + "grad_norm": 47.60365295410156, + "learning_rate": 5e-06, + "loss": 0.9461, + "num_input_tokens_seen": 476798852, + "step": 3775 + }, + { + "epoch": 0.9683832488937344, + "loss": 0.9560651779174805, + "loss_ce": 0.0019636217039078474, + "loss_iou": 0.439453125, + "loss_num": 0.01470947265625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 476798852, + "step": 3775 + }, + { + "epoch": 0.9686397742576798, + "grad_norm": 68.0597152709961, + "learning_rate": 5e-06, + "loss": 0.9664, + "num_input_tokens_seen": 476926000, + "step": 3776 + }, + { + "epoch": 0.9686397742576798, + "loss": 0.9471697807312012, + "loss_ce": 0.001368994009681046, + "loss_iou": 0.43359375, + "loss_num": 0.0155029296875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 476926000, + "step": 3776 + }, + { + "epoch": 0.968896299621625, + "grad_norm": 51.54879379272461, + "learning_rate": 5e-06, + "loss": 0.9529, + "num_input_tokens_seen": 477053680, + "step": 3777 + }, + { + "epoch": 0.968896299621625, + "loss": 0.9372760653495789, + "loss_ce": 0.007588581182062626, + "loss_iou": 0.43359375, + "loss_num": 0.01251220703125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 477053680, + "step": 3777 + }, + { + "epoch": 0.9691528249855704, + "grad_norm": 55.4357795715332, + "learning_rate": 5e-06, + "loss": 0.9686, + "num_input_tokens_seen": 477179636, + "step": 3778 + }, + { + "epoch": 0.9691528249855704, + "loss": 1.04085111618042, + "loss_ce": 0.0003237906494177878, + "loss_iou": 0.4921875, + "loss_num": 0.0115966796875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 477179636, + "step": 3778 + }, + { + "epoch": 0.9694093503495158, + "grad_norm": 59.3634147644043, + "learning_rate": 5e-06, + "loss": 0.9863, + "num_input_tokens_seen": 477307344, + "step": 3779 + }, + { + "epoch": 0.9694093503495158, + "loss": 0.8808553218841553, + "loss_ce": 0.0014608247438445687, + "loss_iou": 0.423828125, + "loss_num": 0.0064697265625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 477307344, + "step": 3779 + }, + { + "epoch": 0.9696658757134612, + "grad_norm": 57.58024215698242, + "learning_rate": 5e-06, + "loss": 0.88, + "num_input_tokens_seen": 477435008, + "step": 3780 + }, + { + "epoch": 0.9696658757134612, + "loss": 0.8308746814727783, + "loss_ce": 0.0005524898879230022, + "loss_iou": 0.373046875, + "loss_num": 0.0167236328125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 477435008, + "step": 3780 + }, + { + "epoch": 0.9699224010774066, + "grad_norm": 53.64982604980469, + "learning_rate": 5e-06, + "loss": 0.9129, + "num_input_tokens_seen": 477562544, + "step": 3781 + }, + { + "epoch": 0.9699224010774066, + "loss": 0.9248617887496948, + "loss_ce": 0.0020102285780012608, + "loss_iou": 0.419921875, + "loss_num": 0.0164794921875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 477562544, + "step": 3781 + }, + { + "epoch": 0.9701789264413518, + "grad_norm": 50.849205017089844, + "learning_rate": 5e-06, + "loss": 1.0309, + "num_input_tokens_seen": 477689252, + "step": 3782 + }, + { + "epoch": 0.9701789264413518, + "loss": 1.0201706886291504, + "loss_ce": 0.0030808113515377045, + "loss_iou": 0.474609375, + "loss_num": 0.01336669921875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 477689252, + "step": 3782 + }, + { + "epoch": 0.9704354518052972, + "grad_norm": 43.21278381347656, + "learning_rate": 5e-06, + "loss": 0.8686, + "num_input_tokens_seen": 477815532, + "step": 3783 + }, + { + "epoch": 0.9704354518052972, + "loss": 0.8473470211029053, + "loss_ce": 0.00017904967535287142, + "loss_iou": 0.39453125, + "loss_num": 0.01177978515625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 477815532, + "step": 3783 + }, + { + "epoch": 0.9706919771692426, + "grad_norm": 42.83711242675781, + "learning_rate": 5e-06, + "loss": 0.8671, + "num_input_tokens_seen": 477942116, + "step": 3784 + }, + { + "epoch": 0.9706919771692426, + "loss": 1.0270847082138062, + "loss_ce": 0.0002292288700118661, + "loss_iou": 0.462890625, + "loss_num": 0.020263671875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 477942116, + "step": 3784 + }, + { + "epoch": 0.970948502533188, + "grad_norm": 48.00284194946289, + "learning_rate": 5e-06, + "loss": 0.9883, + "num_input_tokens_seen": 478067464, + "step": 3785 + }, + { + "epoch": 0.970948502533188, + "loss": 0.9106899499893188, + "loss_ce": 0.0007779004517942667, + "loss_iou": 0.421875, + "loss_num": 0.01318359375, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 478067464, + "step": 3785 + }, + { + "epoch": 0.9712050278971334, + "grad_norm": 49.067779541015625, + "learning_rate": 5e-06, + "loss": 0.8579, + "num_input_tokens_seen": 478193656, + "step": 3786 + }, + { + "epoch": 0.9712050278971334, + "loss": 0.6648258566856384, + "loss_ce": 0.00027508524362929165, + "loss_iou": 0.3125, + "loss_num": 0.00799560546875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 478193656, + "step": 3786 + }, + { + "epoch": 0.9714615532610786, + "grad_norm": 45.956016540527344, + "learning_rate": 5e-06, + "loss": 0.7353, + "num_input_tokens_seen": 478320828, + "step": 3787 + }, + { + "epoch": 0.9714615532610786, + "loss": 0.8010964393615723, + "loss_ce": 0.0008034704369492829, + "loss_iou": 0.37890625, + "loss_num": 0.00897216796875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 478320828, + "step": 3787 + }, + { + "epoch": 0.971718078625024, + "grad_norm": 49.53134536743164, + "learning_rate": 5e-06, + "loss": 0.9559, + "num_input_tokens_seen": 478448120, + "step": 3788 + }, + { + "epoch": 0.971718078625024, + "loss": 0.8926812410354614, + "loss_ce": 0.0005913989734835923, + "loss_iou": 0.416015625, + "loss_num": 0.01202392578125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 478448120, + "step": 3788 + }, + { + "epoch": 0.9719746039889694, + "grad_norm": 48.36085891723633, + "learning_rate": 5e-06, + "loss": 0.8586, + "num_input_tokens_seen": 478573980, + "step": 3789 + }, + { + "epoch": 0.9719746039889694, + "loss": 0.9248802661895752, + "loss_ce": 0.0005638045840896666, + "loss_iou": 0.4296875, + "loss_num": 0.01336669921875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 478573980, + "step": 3789 + }, + { + "epoch": 0.9722311293529148, + "grad_norm": 43.8896598815918, + "learning_rate": 5e-06, + "loss": 0.8679, + "num_input_tokens_seen": 478700460, + "step": 3790 + }, + { + "epoch": 0.9722311293529148, + "loss": 1.0170912742614746, + "loss_ce": 0.0007339154835790396, + "loss_iou": 0.4609375, + "loss_num": 0.018798828125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 478700460, + "step": 3790 + }, + { + "epoch": 0.9724876547168602, + "grad_norm": 41.30145263671875, + "learning_rate": 5e-06, + "loss": 1.0382, + "num_input_tokens_seen": 478827868, + "step": 3791 + }, + { + "epoch": 0.9724876547168602, + "loss": 1.1369524002075195, + "loss_ce": 0.00047791990800760686, + "loss_iou": 0.515625, + "loss_num": 0.021728515625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 478827868, + "step": 3791 + }, + { + "epoch": 0.9727441800808055, + "grad_norm": 91.9818344116211, + "learning_rate": 5e-06, + "loss": 0.797, + "num_input_tokens_seen": 478954456, + "step": 3792 + }, + { + "epoch": 0.9727441800808055, + "loss": 0.6783241033554077, + "loss_ce": 0.004007694311439991, + "loss_iou": 0.32421875, + "loss_num": 0.005584716796875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 478954456, + "step": 3792 + }, + { + "epoch": 0.9730007054447508, + "grad_norm": 56.80247497558594, + "learning_rate": 5e-06, + "loss": 0.9786, + "num_input_tokens_seen": 479081532, + "step": 3793 + }, + { + "epoch": 0.9730007054447508, + "loss": 0.9465374946594238, + "loss_ce": 0.0017132906941697001, + "loss_iou": 0.44140625, + "loss_num": 0.01190185546875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 479081532, + "step": 3793 + }, + { + "epoch": 0.9732572308086962, + "grad_norm": 60.84225082397461, + "learning_rate": 5e-06, + "loss": 0.9072, + "num_input_tokens_seen": 479208724, + "step": 3794 + }, + { + "epoch": 0.9732572308086962, + "loss": 0.7912815809249878, + "loss_ce": 0.002951489295810461, + "loss_iou": 0.361328125, + "loss_num": 0.01275634765625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 479208724, + "step": 3794 + }, + { + "epoch": 0.9735137561726416, + "grad_norm": 43.424171447753906, + "learning_rate": 5e-06, + "loss": 0.9856, + "num_input_tokens_seen": 479332984, + "step": 3795 + }, + { + "epoch": 0.9735137561726416, + "loss": 0.9396910071372986, + "loss_ce": 0.0012144551146775484, + "loss_iou": 0.4296875, + "loss_num": 0.0157470703125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 479332984, + "step": 3795 + }, + { + "epoch": 0.973770281536587, + "grad_norm": 41.4870719909668, + "learning_rate": 5e-06, + "loss": 0.8905, + "num_input_tokens_seen": 479458000, + "step": 3796 + }, + { + "epoch": 0.973770281536587, + "loss": 0.7291303277015686, + "loss_ce": 0.00037055107532069087, + "loss_iou": 0.34765625, + "loss_num": 0.007049560546875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 479458000, + "step": 3796 + }, + { + "epoch": 0.9740268069005323, + "grad_norm": 40.937034606933594, + "learning_rate": 5e-06, + "loss": 0.8884, + "num_input_tokens_seen": 479585104, + "step": 3797 + }, + { + "epoch": 0.9740268069005323, + "loss": 0.9156482815742493, + "loss_ce": 0.0010975201148539782, + "loss_iou": 0.427734375, + "loss_num": 0.01165771484375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 479585104, + "step": 3797 + }, + { + "epoch": 0.9742833322644776, + "grad_norm": 49.57102966308594, + "learning_rate": 5e-06, + "loss": 0.9635, + "num_input_tokens_seen": 479709600, + "step": 3798 + }, + { + "epoch": 0.9742833322644776, + "loss": 0.8308295607566833, + "loss_ce": 0.0017279739258810878, + "loss_iou": 0.384765625, + "loss_num": 0.01202392578125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 479709600, + "step": 3798 + }, + { + "epoch": 0.974539857628423, + "grad_norm": 35.48270797729492, + "learning_rate": 5e-06, + "loss": 0.8044, + "num_input_tokens_seen": 479834844, + "step": 3799 + }, + { + "epoch": 0.974539857628423, + "loss": 0.7902050614356995, + "loss_ce": 0.0001660330599406734, + "loss_iou": 0.37890625, + "loss_num": 0.006439208984375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 479834844, + "step": 3799 + }, + { + "epoch": 0.9747963829923684, + "grad_norm": 51.63544464111328, + "learning_rate": 5e-06, + "loss": 0.8865, + "num_input_tokens_seen": 479960344, + "step": 3800 + }, + { + "epoch": 0.9747963829923684, + "loss": 0.9673585891723633, + "loss_ce": 0.0005617217975668609, + "loss_iou": 0.42578125, + "loss_num": 0.0225830078125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 479960344, + "step": 3800 + }, + { + "epoch": 0.9750529083563138, + "grad_norm": 33.3945198059082, + "learning_rate": 5e-06, + "loss": 0.8115, + "num_input_tokens_seen": 480086600, + "step": 3801 + }, + { + "epoch": 0.9750529083563138, + "loss": 0.8440885543823242, + "loss_ce": 0.0018033909145742655, + "loss_iou": 0.38671875, + "loss_num": 0.0135498046875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 480086600, + "step": 3801 + }, + { + "epoch": 0.9753094337202591, + "grad_norm": 26.238550186157227, + "learning_rate": 5e-06, + "loss": 0.8784, + "num_input_tokens_seen": 480212440, + "step": 3802 + }, + { + "epoch": 0.9753094337202591, + "loss": 0.920222282409668, + "loss_ce": 0.00323007651604712, + "loss_iou": 0.42578125, + "loss_num": 0.013671875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 480212440, + "step": 3802 + }, + { + "epoch": 0.9755659590842044, + "grad_norm": 33.98586654663086, + "learning_rate": 5e-06, + "loss": 0.8902, + "num_input_tokens_seen": 480338776, + "step": 3803 + }, + { + "epoch": 0.9755659590842044, + "loss": 0.8389899730682373, + "loss_ce": 0.00036700593773275614, + "loss_iou": 0.392578125, + "loss_num": 0.01116943359375, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 480338776, + "step": 3803 + }, + { + "epoch": 0.9758224844481498, + "grad_norm": 37.99490737915039, + "learning_rate": 5e-06, + "loss": 0.961, + "num_input_tokens_seen": 480464764, + "step": 3804 + }, + { + "epoch": 0.9758224844481498, + "loss": 0.8786604404449463, + "loss_ce": 0.0007307725609280169, + "loss_iou": 0.416015625, + "loss_num": 0.00909423828125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 480464764, + "step": 3804 + }, + { + "epoch": 0.9760790098120952, + "grad_norm": 36.646942138671875, + "learning_rate": 5e-06, + "loss": 0.8755, + "num_input_tokens_seen": 480591928, + "step": 3805 + }, + { + "epoch": 0.9760790098120952, + "loss": 0.9064186215400696, + "loss_ce": 0.0016334872925654054, + "loss_iou": 0.40625, + "loss_num": 0.0184326171875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 480591928, + "step": 3805 + }, + { + "epoch": 0.9763355351760405, + "grad_norm": 45.871238708496094, + "learning_rate": 5e-06, + "loss": 0.9306, + "num_input_tokens_seen": 480719264, + "step": 3806 + }, + { + "epoch": 0.9763355351760405, + "loss": 0.9059736728668213, + "loss_ce": 0.0007002401980571449, + "loss_iou": 0.419921875, + "loss_num": 0.01287841796875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 480719264, + "step": 3806 + }, + { + "epoch": 0.9765920605399859, + "grad_norm": 34.62343215942383, + "learning_rate": 5e-06, + "loss": 0.9901, + "num_input_tokens_seen": 480845744, + "step": 3807 + }, + { + "epoch": 0.9765920605399859, + "loss": 0.8348743915557861, + "loss_ce": 0.0013782616006210446, + "loss_iou": 0.38671875, + "loss_num": 0.01220703125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 480845744, + "step": 3807 + }, + { + "epoch": 0.9768485859039312, + "grad_norm": 22.532493591308594, + "learning_rate": 5e-06, + "loss": 0.8511, + "num_input_tokens_seen": 480973392, + "step": 3808 + }, + { + "epoch": 0.9768485859039312, + "loss": 0.8953410387039185, + "loss_ce": 0.0003215124597772956, + "loss_iou": 0.408203125, + "loss_num": 0.015869140625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 480973392, + "step": 3808 + }, + { + "epoch": 0.9771051112678766, + "grad_norm": 31.452713012695312, + "learning_rate": 5e-06, + "loss": 0.8775, + "num_input_tokens_seen": 481099708, + "step": 3809 + }, + { + "epoch": 0.9771051112678766, + "loss": 0.7500057816505432, + "loss_ce": 0.0002499092370271683, + "loss_iou": 0.357421875, + "loss_num": 0.0072021484375, + "loss_xval": 0.75, + "num_input_tokens_seen": 481099708, + "step": 3809 + }, + { + "epoch": 0.977361636631822, + "grad_norm": 34.24294662475586, + "learning_rate": 5e-06, + "loss": 0.8045, + "num_input_tokens_seen": 481225944, + "step": 3810 + }, + { + "epoch": 0.977361636631822, + "loss": 0.9247161149978638, + "loss_ce": 0.0003997244348283857, + "loss_iou": 0.419921875, + "loss_num": 0.016845703125, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 481225944, + "step": 3810 + }, + { + "epoch": 0.9776181619957673, + "grad_norm": 60.927921295166016, + "learning_rate": 5e-06, + "loss": 0.9146, + "num_input_tokens_seen": 481351928, + "step": 3811 + }, + { + "epoch": 0.9776181619957673, + "loss": 1.0008454322814941, + "loss_ce": 0.0013337605632841587, + "loss_iou": 0.458984375, + "loss_num": 0.016357421875, + "loss_xval": 1.0, + "num_input_tokens_seen": 481351928, + "step": 3811 + }, + { + "epoch": 0.9778746873597127, + "grad_norm": 38.550018310546875, + "learning_rate": 5e-06, + "loss": 1.0196, + "num_input_tokens_seen": 481478664, + "step": 3812 + }, + { + "epoch": 0.9778746873597127, + "loss": 1.272307276725769, + "loss_ce": 0.0027760120574384928, + "loss_iou": 0.55078125, + "loss_num": 0.033447265625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 481478664, + "step": 3812 + }, + { + "epoch": 0.9781312127236581, + "grad_norm": 58.268218994140625, + "learning_rate": 5e-06, + "loss": 0.8302, + "num_input_tokens_seen": 481606304, + "step": 3813 + }, + { + "epoch": 0.9781312127236581, + "loss": 0.6138761639595032, + "loss_ce": 0.002059745602309704, + "loss_iou": 0.294921875, + "loss_num": 0.00457763671875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 481606304, + "step": 3813 + }, + { + "epoch": 0.9783877380876034, + "grad_norm": 45.937530517578125, + "learning_rate": 5e-06, + "loss": 0.9694, + "num_input_tokens_seen": 481732396, + "step": 3814 + }, + { + "epoch": 0.9783877380876034, + "loss": 0.8867267370223999, + "loss_ce": 0.000496279972139746, + "loss_iou": 0.41015625, + "loss_num": 0.012939453125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 481732396, + "step": 3814 + }, + { + "epoch": 0.9786442634515488, + "grad_norm": 24.20002555847168, + "learning_rate": 5e-06, + "loss": 0.8443, + "num_input_tokens_seen": 481859316, + "step": 3815 + }, + { + "epoch": 0.9786442634515488, + "loss": 0.6843119859695435, + "loss_ce": 0.0016948458505794406, + "loss_iou": 0.322265625, + "loss_num": 0.0076904296875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 481859316, + "step": 3815 + }, + { + "epoch": 0.9789007888154941, + "grad_norm": 38.585540771484375, + "learning_rate": 5e-06, + "loss": 0.8715, + "num_input_tokens_seen": 481986556, + "step": 3816 + }, + { + "epoch": 0.9789007888154941, + "loss": 0.7374401092529297, + "loss_ce": 0.0008678692393004894, + "loss_iou": 0.34765625, + "loss_num": 0.00836181640625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 481986556, + "step": 3816 + }, + { + "epoch": 0.9791573141794395, + "grad_norm": 76.50428771972656, + "learning_rate": 5e-06, + "loss": 0.9653, + "num_input_tokens_seen": 482114576, + "step": 3817 + }, + { + "epoch": 0.9791573141794395, + "loss": 1.13668954372406, + "loss_ce": 0.001435635145753622, + "loss_iou": 0.5078125, + "loss_num": 0.0234375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 482114576, + "step": 3817 + }, + { + "epoch": 0.9794138395433849, + "grad_norm": 53.28171157836914, + "learning_rate": 5e-06, + "loss": 1.0295, + "num_input_tokens_seen": 482239756, + "step": 3818 + }, + { + "epoch": 0.9794138395433849, + "loss": 1.114926815032959, + "loss_ce": 0.00018064792675431818, + "loss_iou": 0.515625, + "loss_num": 0.015869140625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 482239756, + "step": 3818 + }, + { + "epoch": 0.9796703649073302, + "grad_norm": 33.97561264038086, + "learning_rate": 5e-06, + "loss": 0.8223, + "num_input_tokens_seen": 482367492, + "step": 3819 + }, + { + "epoch": 0.9796703649073302, + "loss": 0.7915701866149902, + "loss_ce": 0.0005545966560021043, + "loss_iou": 0.37109375, + "loss_num": 0.01025390625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 482367492, + "step": 3819 + }, + { + "epoch": 0.9799268902712756, + "grad_norm": 25.779245376586914, + "learning_rate": 5e-06, + "loss": 0.8625, + "num_input_tokens_seen": 482494444, + "step": 3820 + }, + { + "epoch": 0.9799268902712756, + "loss": 0.7230817675590515, + "loss_ce": 0.0006696414202451706, + "loss_iou": 0.341796875, + "loss_num": 0.00787353515625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 482494444, + "step": 3820 + }, + { + "epoch": 0.9801834156352209, + "grad_norm": 33.87136459350586, + "learning_rate": 5e-06, + "loss": 0.9434, + "num_input_tokens_seen": 482620392, + "step": 3821 + }, + { + "epoch": 0.9801834156352209, + "loss": 0.710566520690918, + "loss_ce": 0.0008497126400470734, + "loss_iou": 0.3359375, + "loss_num": 0.0072021484375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 482620392, + "step": 3821 + }, + { + "epoch": 0.9804399409991663, + "grad_norm": 45.86960220336914, + "learning_rate": 5e-06, + "loss": 0.8085, + "num_input_tokens_seen": 482746868, + "step": 3822 + }, + { + "epoch": 0.9804399409991663, + "loss": 0.9133434295654297, + "loss_ce": 0.003675471991300583, + "loss_iou": 0.421875, + "loss_num": 0.0133056640625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 482746868, + "step": 3822 + }, + { + "epoch": 0.9806964663631117, + "grad_norm": 52.059181213378906, + "learning_rate": 5e-06, + "loss": 0.8824, + "num_input_tokens_seen": 482873396, + "step": 3823 + }, + { + "epoch": 0.9806964663631117, + "loss": 0.67775559425354, + "loss_ce": 0.001974322134628892, + "loss_iou": 0.326171875, + "loss_num": 0.004425048828125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 482873396, + "step": 3823 + }, + { + "epoch": 0.980952991727057, + "grad_norm": 55.782676696777344, + "learning_rate": 5e-06, + "loss": 0.8906, + "num_input_tokens_seen": 482998752, + "step": 3824 + }, + { + "epoch": 0.980952991727057, + "loss": 0.9578033685684204, + "loss_ce": 0.0002837996289599687, + "loss_iou": 0.443359375, + "loss_num": 0.01385498046875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 482998752, + "step": 3824 + }, + { + "epoch": 0.9812095170910023, + "grad_norm": 58.48487091064453, + "learning_rate": 5e-06, + "loss": 0.9513, + "num_input_tokens_seen": 483124796, + "step": 3825 + }, + { + "epoch": 0.9812095170910023, + "loss": 0.8835445046424866, + "loss_ce": 0.0004879114276263863, + "loss_iou": 0.412109375, + "loss_num": 0.011962890625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 483124796, + "step": 3825 + }, + { + "epoch": 0.9814660424549477, + "grad_norm": 38.821388244628906, + "learning_rate": 5e-06, + "loss": 0.929, + "num_input_tokens_seen": 483250352, + "step": 3826 + }, + { + "epoch": 0.9814660424549477, + "loss": 0.9396919012069702, + "loss_ce": 0.0026802178472280502, + "loss_iou": 0.41796875, + "loss_num": 0.0206298828125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 483250352, + "step": 3826 + }, + { + "epoch": 0.9817225678188931, + "grad_norm": 40.44762420654297, + "learning_rate": 5e-06, + "loss": 0.9569, + "num_input_tokens_seen": 483376500, + "step": 3827 + }, + { + "epoch": 0.9817225678188931, + "loss": 0.8776958584785461, + "loss_ce": 0.00025444256607443094, + "loss_iou": 0.4140625, + "loss_num": 0.00970458984375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 483376500, + "step": 3827 + }, + { + "epoch": 0.9819790931828385, + "grad_norm": 44.034725189208984, + "learning_rate": 5e-06, + "loss": 0.8453, + "num_input_tokens_seen": 483502240, + "step": 3828 + }, + { + "epoch": 0.9819790931828385, + "loss": 0.9015566110610962, + "loss_ce": 0.0014101271517574787, + "loss_iou": 0.404296875, + "loss_num": 0.0185546875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 483502240, + "step": 3828 + }, + { + "epoch": 0.9822356185467838, + "grad_norm": 43.429622650146484, + "learning_rate": 5e-06, + "loss": 0.9798, + "num_input_tokens_seen": 483628696, + "step": 3829 + }, + { + "epoch": 0.9822356185467838, + "loss": 1.0202128887176514, + "loss_ce": 0.001169844064861536, + "loss_iou": 0.4609375, + "loss_num": 0.019287109375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 483628696, + "step": 3829 + }, + { + "epoch": 0.9824921439107291, + "grad_norm": 58.97622299194336, + "learning_rate": 5e-06, + "loss": 0.8466, + "num_input_tokens_seen": 483755516, + "step": 3830 + }, + { + "epoch": 0.9824921439107291, + "loss": 0.7860400676727295, + "loss_ce": 0.0013721315190196037, + "loss_iou": 0.37109375, + "loss_num": 0.00885009765625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 483755516, + "step": 3830 + }, + { + "epoch": 0.9827486692746745, + "grad_norm": 47.82119369506836, + "learning_rate": 5e-06, + "loss": 1.0832, + "num_input_tokens_seen": 483881776, + "step": 3831 + }, + { + "epoch": 0.9827486692746745, + "loss": 0.8283027410507202, + "loss_ce": 0.0001777127181412652, + "loss_iou": 0.388671875, + "loss_num": 0.00982666015625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 483881776, + "step": 3831 + }, + { + "epoch": 0.9830051946386199, + "grad_norm": 56.91780090332031, + "learning_rate": 5e-06, + "loss": 1.028, + "num_input_tokens_seen": 484007524, + "step": 3832 + }, + { + "epoch": 0.9830051946386199, + "loss": 1.019424557685852, + "loss_ce": 0.0003816070966422558, + "loss_iou": 0.470703125, + "loss_num": 0.01513671875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 484007524, + "step": 3832 + }, + { + "epoch": 0.9832617200025653, + "grad_norm": 42.00761032104492, + "learning_rate": 5e-06, + "loss": 0.9839, + "num_input_tokens_seen": 484133284, + "step": 3833 + }, + { + "epoch": 0.9832617200025653, + "loss": 1.0506396293640137, + "loss_ce": 0.0008349920390173793, + "loss_iou": 0.462890625, + "loss_num": 0.0245361328125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 484133284, + "step": 3833 + }, + { + "epoch": 0.9835182453665107, + "grad_norm": 47.29082489013672, + "learning_rate": 5e-06, + "loss": 0.9595, + "num_input_tokens_seen": 484258884, + "step": 3834 + }, + { + "epoch": 0.9835182453665107, + "loss": 1.064565658569336, + "loss_ce": 0.00035663513699546456, + "loss_iou": 0.484375, + "loss_num": 0.0189208984375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 484258884, + "step": 3834 + }, + { + "epoch": 0.9837747707304559, + "grad_norm": 51.915714263916016, + "learning_rate": 5e-06, + "loss": 1.0339, + "num_input_tokens_seen": 484385300, + "step": 3835 + }, + { + "epoch": 0.9837747707304559, + "loss": 1.1462863683700562, + "loss_ce": 0.001266773440875113, + "loss_iou": 0.51953125, + "loss_num": 0.0203857421875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 484385300, + "step": 3835 + }, + { + "epoch": 0.9840312960944013, + "grad_norm": 57.738651275634766, + "learning_rate": 5e-06, + "loss": 1.0651, + "num_input_tokens_seen": 484511956, + "step": 3836 + }, + { + "epoch": 0.9840312960944013, + "loss": 1.238745093345642, + "loss_ce": 0.000463884964119643, + "loss_iou": 0.5234375, + "loss_num": 0.0380859375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 484511956, + "step": 3836 + }, + { + "epoch": 0.9842878214583467, + "grad_norm": 52.526912689208984, + "learning_rate": 5e-06, + "loss": 0.8613, + "num_input_tokens_seen": 484638672, + "step": 3837 + }, + { + "epoch": 0.9842878214583467, + "loss": 0.7705883979797363, + "loss_ce": 0.0020336683373898268, + "loss_iou": 0.361328125, + "loss_num": 0.0091552734375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 484638672, + "step": 3837 + }, + { + "epoch": 0.9845443468222921, + "grad_norm": 41.436824798583984, + "learning_rate": 5e-06, + "loss": 0.8485, + "num_input_tokens_seen": 484764452, + "step": 3838 + }, + { + "epoch": 0.9845443468222921, + "loss": 0.7732859253883362, + "loss_ce": 0.0008250030805356801, + "loss_iou": 0.3671875, + "loss_num": 0.0076904296875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 484764452, + "step": 3838 + }, + { + "epoch": 0.9848008721862375, + "grad_norm": 46.6375846862793, + "learning_rate": 5e-06, + "loss": 0.8306, + "num_input_tokens_seen": 484890668, + "step": 3839 + }, + { + "epoch": 0.9848008721862375, + "loss": 0.7089645862579346, + "loss_ce": 0.0007126600830815732, + "loss_iou": 0.33203125, + "loss_num": 0.0086669921875, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 484890668, + "step": 3839 + }, + { + "epoch": 0.9850573975501827, + "grad_norm": 65.32672882080078, + "learning_rate": 5e-06, + "loss": 0.9597, + "num_input_tokens_seen": 485017544, + "step": 3840 + }, + { + "epoch": 0.9850573975501827, + "loss": 0.8571253418922424, + "loss_ce": 0.001656589680351317, + "loss_iou": 0.40625, + "loss_num": 0.0089111328125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 485017544, + "step": 3840 + }, + { + "epoch": 0.9853139229141281, + "grad_norm": 49.08979797363281, + "learning_rate": 5e-06, + "loss": 0.9914, + "num_input_tokens_seen": 485143292, + "step": 3841 + }, + { + "epoch": 0.9853139229141281, + "loss": 1.0471961498260498, + "loss_ce": 0.0003211740404367447, + "loss_iou": 0.47265625, + "loss_num": 0.0201416015625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 485143292, + "step": 3841 + }, + { + "epoch": 0.9855704482780735, + "grad_norm": 42.99983596801758, + "learning_rate": 5e-06, + "loss": 0.8349, + "num_input_tokens_seen": 485268908, + "step": 3842 + }, + { + "epoch": 0.9855704482780735, + "loss": 0.8543634414672852, + "loss_ce": 0.0023126155138015747, + "loss_iou": 0.390625, + "loss_num": 0.0142822265625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 485268908, + "step": 3842 + }, + { + "epoch": 0.9858269736420189, + "grad_norm": 39.54624557495117, + "learning_rate": 5e-06, + "loss": 0.8593, + "num_input_tokens_seen": 485396664, + "step": 3843 + }, + { + "epoch": 0.9858269736420189, + "loss": 0.709017276763916, + "loss_ce": 0.0002770793507806957, + "loss_iou": 0.328125, + "loss_num": 0.01019287109375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 485396664, + "step": 3843 + }, + { + "epoch": 0.9860834990059643, + "grad_norm": 45.982295989990234, + "learning_rate": 5e-06, + "loss": 0.8227, + "num_input_tokens_seen": 485522904, + "step": 3844 + }, + { + "epoch": 0.9860834990059643, + "loss": 0.7737441062927246, + "loss_ce": 0.0007948796264827251, + "loss_iou": 0.373046875, + "loss_num": 0.005340576171875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 485522904, + "step": 3844 + }, + { + "epoch": 0.9863400243699095, + "grad_norm": 44.989044189453125, + "learning_rate": 5e-06, + "loss": 0.9239, + "num_input_tokens_seen": 485649128, + "step": 3845 + }, + { + "epoch": 0.9863400243699095, + "loss": 0.8725839257240295, + "loss_ce": 0.0002694434951990843, + "loss_iou": 0.39453125, + "loss_num": 0.0166015625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 485649128, + "step": 3845 + }, + { + "epoch": 0.9865965497338549, + "grad_norm": 34.919471740722656, + "learning_rate": 5e-06, + "loss": 0.8379, + "num_input_tokens_seen": 485774824, + "step": 3846 + }, + { + "epoch": 0.9865965497338549, + "loss": 0.6525382995605469, + "loss_ce": 0.0009269894217140973, + "loss_iou": 0.30859375, + "loss_num": 0.0067138671875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 485774824, + "step": 3846 + }, + { + "epoch": 0.9868530750978003, + "grad_norm": 41.9149169921875, + "learning_rate": 5e-06, + "loss": 0.8633, + "num_input_tokens_seen": 485901576, + "step": 3847 + }, + { + "epoch": 0.9868530750978003, + "loss": 0.8874931335449219, + "loss_ce": 0.004436480347067118, + "loss_iou": 0.3984375, + "loss_num": 0.0169677734375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 485901576, + "step": 3847 + }, + { + "epoch": 0.9871096004617457, + "grad_norm": 54.38967514038086, + "learning_rate": 5e-06, + "loss": 0.8901, + "num_input_tokens_seen": 486028844, + "step": 3848 + }, + { + "epoch": 0.9871096004617457, + "loss": 0.8593472242355347, + "loss_ce": 0.0016811818350106478, + "loss_iou": 0.390625, + "loss_num": 0.015625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 486028844, + "step": 3848 + }, + { + "epoch": 0.987366125825691, + "grad_norm": 44.892845153808594, + "learning_rate": 5e-06, + "loss": 0.9428, + "num_input_tokens_seen": 486154200, + "step": 3849 + }, + { + "epoch": 0.987366125825691, + "loss": 1.0272352695465088, + "loss_ce": 0.0008680171449668705, + "loss_iou": 0.478515625, + "loss_num": 0.01409912109375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 486154200, + "step": 3849 + }, + { + "epoch": 0.9876226511896363, + "grad_norm": 110.74919128417969, + "learning_rate": 5e-06, + "loss": 0.8548, + "num_input_tokens_seen": 486280412, + "step": 3850 + }, + { + "epoch": 0.9876226511896363, + "loss": 0.6786231994628906, + "loss_ce": 0.0006446837214753032, + "loss_iou": 0.318359375, + "loss_num": 0.008544921875, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 486280412, + "step": 3850 + }, + { + "epoch": 0.9878791765535817, + "grad_norm": 44.517276763916016, + "learning_rate": 5e-06, + "loss": 0.855, + "num_input_tokens_seen": 486406940, + "step": 3851 + }, + { + "epoch": 0.9878791765535817, + "loss": 0.716771125793457, + "loss_ce": 0.0019273933721706271, + "loss_iou": 0.34765625, + "loss_num": 0.003814697265625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 486406940, + "step": 3851 + }, + { + "epoch": 0.9881357019175271, + "grad_norm": 41.114166259765625, + "learning_rate": 5e-06, + "loss": 0.9305, + "num_input_tokens_seen": 486532732, + "step": 3852 + }, + { + "epoch": 0.9881357019175271, + "loss": 0.9515881538391113, + "loss_ce": 0.0013928130501881242, + "loss_iou": 0.4296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 486532732, + "step": 3852 + }, + { + "epoch": 0.9883922272814725, + "grad_norm": 69.42011260986328, + "learning_rate": 5e-06, + "loss": 0.9071, + "num_input_tokens_seen": 486658708, + "step": 3853 + }, + { + "epoch": 0.9883922272814725, + "loss": 0.8449473977088928, + "loss_ce": 0.003150523640215397, + "loss_iou": 0.37890625, + "loss_num": 0.01708984375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 486658708, + "step": 3853 + }, + { + "epoch": 0.9886487526454178, + "grad_norm": 33.0396842956543, + "learning_rate": 5e-06, + "loss": 0.8771, + "num_input_tokens_seen": 486785888, + "step": 3854 + }, + { + "epoch": 0.9886487526454178, + "loss": 1.144322395324707, + "loss_ce": 0.005162164103239775, + "loss_iou": 0.5078125, + "loss_num": 0.025390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 486785888, + "step": 3854 + }, + { + "epoch": 0.9889052780093632, + "grad_norm": 44.102783203125, + "learning_rate": 5e-06, + "loss": 0.9777, + "num_input_tokens_seen": 486911924, + "step": 3855 + }, + { + "epoch": 0.9889052780093632, + "loss": 1.0512350797653198, + "loss_ce": 0.00020970971672795713, + "loss_iou": 0.490234375, + "loss_num": 0.01373291015625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 486911924, + "step": 3855 + }, + { + "epoch": 0.9891618033733085, + "grad_norm": 45.1164665222168, + "learning_rate": 5e-06, + "loss": 0.8995, + "num_input_tokens_seen": 487039800, + "step": 3856 + }, + { + "epoch": 0.9891618033733085, + "loss": 0.8230364322662354, + "loss_ce": 0.0017473774496465921, + "loss_iou": 0.376953125, + "loss_num": 0.013427734375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 487039800, + "step": 3856 + }, + { + "epoch": 0.9894183287372539, + "grad_norm": 30.186452865600586, + "learning_rate": 5e-06, + "loss": 0.9116, + "num_input_tokens_seen": 487164904, + "step": 3857 + }, + { + "epoch": 0.9894183287372539, + "loss": 0.9186214804649353, + "loss_ce": 0.0011410375591367483, + "loss_iou": 0.419921875, + "loss_num": 0.0157470703125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 487164904, + "step": 3857 + }, + { + "epoch": 0.9896748541011993, + "grad_norm": 43.00091552734375, + "learning_rate": 5e-06, + "loss": 0.8135, + "num_input_tokens_seen": 487291552, + "step": 3858 + }, + { + "epoch": 0.9896748541011993, + "loss": 0.942298412322998, + "loss_ce": 0.0033335976768285036, + "loss_iou": 0.43359375, + "loss_num": 0.0146484375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 487291552, + "step": 3858 + }, + { + "epoch": 0.9899313794651446, + "grad_norm": 42.9818229675293, + "learning_rate": 5e-06, + "loss": 0.8503, + "num_input_tokens_seen": 487418004, + "step": 3859 + }, + { + "epoch": 0.9899313794651446, + "loss": 0.8520383238792419, + "loss_ce": 0.0029172429349273443, + "loss_iou": 0.390625, + "loss_num": 0.01300048828125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 487418004, + "step": 3859 + }, + { + "epoch": 0.99018790482909, + "grad_norm": 47.033870697021484, + "learning_rate": 5e-06, + "loss": 0.7566, + "num_input_tokens_seen": 487544640, + "step": 3860 + }, + { + "epoch": 0.99018790482909, + "loss": 0.7272671461105347, + "loss_ce": 0.0011929069878533483, + "loss_iou": 0.345703125, + "loss_num": 0.007110595703125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 487544640, + "step": 3860 + }, + { + "epoch": 0.9904444301930353, + "grad_norm": 46.99728775024414, + "learning_rate": 5e-06, + "loss": 0.8838, + "num_input_tokens_seen": 487670560, + "step": 3861 + }, + { + "epoch": 0.9904444301930353, + "loss": 0.8590025901794434, + "loss_ce": 0.0015807045856490731, + "loss_iou": 0.392578125, + "loss_num": 0.01422119140625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 487670560, + "step": 3861 + }, + { + "epoch": 0.9907009555569807, + "grad_norm": 37.164066314697266, + "learning_rate": 5e-06, + "loss": 0.9017, + "num_input_tokens_seen": 487796568, + "step": 3862 + }, + { + "epoch": 0.9907009555569807, + "loss": 0.9254218339920044, + "loss_ce": 0.0011054262286052108, + "loss_iou": 0.4375, + "loss_num": 0.01019287109375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 487796568, + "step": 3862 + }, + { + "epoch": 0.9909574809209261, + "grad_norm": 41.15232849121094, + "learning_rate": 5e-06, + "loss": 0.8265, + "num_input_tokens_seen": 487921516, + "step": 3863 + }, + { + "epoch": 0.9909574809209261, + "loss": 0.8543081283569336, + "loss_ce": 0.00030424704891629517, + "loss_iou": 0.3984375, + "loss_num": 0.01153564453125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 487921516, + "step": 3863 + }, + { + "epoch": 0.9912140062848714, + "grad_norm": 41.36125564575195, + "learning_rate": 5e-06, + "loss": 0.8359, + "num_input_tokens_seen": 488047772, + "step": 3864 + }, + { + "epoch": 0.9912140062848714, + "loss": 0.7927870750427246, + "loss_ce": 0.0034804188180714846, + "loss_iou": 0.37109375, + "loss_num": 0.009521484375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 488047772, + "step": 3864 + }, + { + "epoch": 0.9914705316488168, + "grad_norm": 47.32115936279297, + "learning_rate": 5e-06, + "loss": 0.8506, + "num_input_tokens_seen": 488174352, + "step": 3865 + }, + { + "epoch": 0.9914705316488168, + "loss": 0.7862775921821594, + "loss_ce": 0.00014478995581157506, + "loss_iou": 0.3671875, + "loss_num": 0.0101318359375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 488174352, + "step": 3865 + }, + { + "epoch": 0.9917270570127621, + "grad_norm": 68.58698272705078, + "learning_rate": 5e-06, + "loss": 0.9685, + "num_input_tokens_seen": 488301608, + "step": 3866 + }, + { + "epoch": 0.9917270570127621, + "loss": 0.8053591251373291, + "loss_ce": 0.0016482011415064335, + "loss_iou": 0.37109375, + "loss_num": 0.0123291015625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 488301608, + "step": 3866 + }, + { + "epoch": 0.9919835823767075, + "grad_norm": 58.97636795043945, + "learning_rate": 5e-06, + "loss": 1.0394, + "num_input_tokens_seen": 488428752, + "step": 3867 + }, + { + "epoch": 0.9919835823767075, + "loss": 1.04227614402771, + "loss_ce": 0.004190301522612572, + "loss_iou": 0.46875, + "loss_num": 0.02001953125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 488428752, + "step": 3867 + }, + { + "epoch": 0.9922401077406529, + "grad_norm": 23.901742935180664, + "learning_rate": 5e-06, + "loss": 0.996, + "num_input_tokens_seen": 488554324, + "step": 3868 + }, + { + "epoch": 0.9922401077406529, + "loss": 0.9241127967834473, + "loss_ce": 0.0005288515239953995, + "loss_iou": 0.431640625, + "loss_num": 0.01171875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 488554324, + "step": 3868 + }, + { + "epoch": 0.9924966331045982, + "grad_norm": 28.20534324645996, + "learning_rate": 5e-06, + "loss": 0.9904, + "num_input_tokens_seen": 488680132, + "step": 3869 + }, + { + "epoch": 0.9924966331045982, + "loss": 0.8508338928222656, + "loss_ce": 0.0017128018662333488, + "loss_iou": 0.3828125, + "loss_num": 0.0166015625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 488680132, + "step": 3869 + }, + { + "epoch": 0.9927531584685436, + "grad_norm": 45.32927703857422, + "learning_rate": 5e-06, + "loss": 0.829, + "num_input_tokens_seen": 488805764, + "step": 3870 + }, + { + "epoch": 0.9927531584685436, + "loss": 0.83472740650177, + "loss_ce": 0.0002547111944295466, + "loss_iou": 0.392578125, + "loss_num": 0.0098876953125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 488805764, + "step": 3870 + }, + { + "epoch": 0.9930096838324889, + "grad_norm": 52.35927200317383, + "learning_rate": 5e-06, + "loss": 0.8366, + "num_input_tokens_seen": 488932204, + "step": 3871 + }, + { + "epoch": 0.9930096838324889, + "loss": 0.7943147420883179, + "loss_ce": 0.0018342548282817006, + "loss_iou": 0.373046875, + "loss_num": 0.009033203125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 488932204, + "step": 3871 + }, + { + "epoch": 0.9932662091964343, + "grad_norm": 53.98024368286133, + "learning_rate": 5e-06, + "loss": 0.866, + "num_input_tokens_seen": 489059024, + "step": 3872 + }, + { + "epoch": 0.9932662091964343, + "loss": 1.0288567543029785, + "loss_ce": 0.0005363976815715432, + "loss_iou": 0.466796875, + "loss_num": 0.0189208984375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 489059024, + "step": 3872 + }, + { + "epoch": 0.9935227345603797, + "grad_norm": 40.535011291503906, + "learning_rate": 5e-06, + "loss": 0.9398, + "num_input_tokens_seen": 489184604, + "step": 3873 + }, + { + "epoch": 0.9935227345603797, + "loss": 0.9352429509162903, + "loss_ce": 0.0016492563299834728, + "loss_iou": 0.423828125, + "loss_num": 0.0169677734375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 489184604, + "step": 3873 + }, + { + "epoch": 0.993779259924325, + "grad_norm": 32.17060470581055, + "learning_rate": 5e-06, + "loss": 0.9241, + "num_input_tokens_seen": 489311748, + "step": 3874 + }, + { + "epoch": 0.993779259924325, + "loss": 0.7515289783477783, + "loss_ce": 0.0003082709154114127, + "loss_iou": 0.357421875, + "loss_num": 0.007293701171875, + "loss_xval": 0.75, + "num_input_tokens_seen": 489311748, + "step": 3874 + }, + { + "epoch": 0.9940357852882704, + "grad_norm": 62.315616607666016, + "learning_rate": 5e-06, + "loss": 0.8974, + "num_input_tokens_seen": 489438196, + "step": 3875 + }, + { + "epoch": 0.9940357852882704, + "loss": 0.764228880405426, + "loss_ce": 0.0027542519383132458, + "loss_iou": 0.35546875, + "loss_num": 0.010498046875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 489438196, + "step": 3875 + }, + { + "epoch": 0.9942923106522157, + "grad_norm": 60.7896728515625, + "learning_rate": 5e-06, + "loss": 0.8173, + "num_input_tokens_seen": 489565564, + "step": 3876 + }, + { + "epoch": 0.9942923106522157, + "loss": 0.7176902890205383, + "loss_ce": 0.00040516071021556854, + "loss_iou": 0.33984375, + "loss_num": 0.007415771484375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 489565564, + "step": 3876 + }, + { + "epoch": 0.9945488360161611, + "grad_norm": 43.65571975708008, + "learning_rate": 5e-06, + "loss": 0.8637, + "num_input_tokens_seen": 489691324, + "step": 3877 + }, + { + "epoch": 0.9945488360161611, + "loss": 0.8290260434150696, + "loss_ce": 0.003586562117561698, + "loss_iou": 0.390625, + "loss_num": 0.0089111328125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 489691324, + "step": 3877 + }, + { + "epoch": 0.9948053613801064, + "grad_norm": 41.68125534057617, + "learning_rate": 5e-06, + "loss": 0.8428, + "num_input_tokens_seen": 489817916, + "step": 3878 + }, + { + "epoch": 0.9948053613801064, + "loss": 0.913058340549469, + "loss_ce": 0.0004606684669852257, + "loss_iou": 0.421875, + "loss_num": 0.01416015625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 489817916, + "step": 3878 + }, + { + "epoch": 0.9950618867440518, + "grad_norm": 44.369529724121094, + "learning_rate": 5e-06, + "loss": 0.8854, + "num_input_tokens_seen": 489943964, + "step": 3879 + }, + { + "epoch": 0.9950618867440518, + "loss": 0.8251513242721558, + "loss_ce": 0.00044425539090298116, + "loss_iou": 0.392578125, + "loss_num": 0.0081787109375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 489943964, + "step": 3879 + }, + { + "epoch": 0.9953184121079972, + "grad_norm": 29.838436126708984, + "learning_rate": 5e-06, + "loss": 0.8636, + "num_input_tokens_seen": 490071032, + "step": 3880 + }, + { + "epoch": 0.9953184121079972, + "loss": 0.787615180015564, + "loss_ce": 0.0012381981359794736, + "loss_iou": 0.37890625, + "loss_num": 0.00592041015625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 490071032, + "step": 3880 + }, + { + "epoch": 0.9955749374719426, + "grad_norm": 37.56049346923828, + "learning_rate": 5e-06, + "loss": 0.8708, + "num_input_tokens_seen": 490196236, + "step": 3881 + }, + { + "epoch": 0.9955749374719426, + "loss": 0.8166136741638184, + "loss_ce": 0.0006956889992579818, + "loss_iou": 0.3828125, + "loss_num": 0.0101318359375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 490196236, + "step": 3881 + }, + { + "epoch": 0.9958314628358879, + "grad_norm": 55.184505462646484, + "learning_rate": 5e-06, + "loss": 0.9222, + "num_input_tokens_seen": 490323220, + "step": 3882 + }, + { + "epoch": 0.9958314628358879, + "loss": 0.8866286873817444, + "loss_ce": 0.0003982450580224395, + "loss_iou": 0.396484375, + "loss_num": 0.0184326171875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 490323220, + "step": 3882 + }, + { + "epoch": 0.9960879881998332, + "grad_norm": 58.26996612548828, + "learning_rate": 5e-06, + "loss": 0.9311, + "num_input_tokens_seen": 490449524, + "step": 3883 + }, + { + "epoch": 0.9960879881998332, + "loss": 0.9798160791397095, + "loss_ce": 0.0013004731154069304, + "loss_iou": 0.4609375, + "loss_num": 0.01123046875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 490449524, + "step": 3883 + }, + { + "epoch": 0.9963445135637786, + "grad_norm": 32.543800354003906, + "learning_rate": 5e-06, + "loss": 0.8101, + "num_input_tokens_seen": 490576952, + "step": 3884 + }, + { + "epoch": 0.9963445135637786, + "loss": 0.9826844930648804, + "loss_ce": 0.0002625897468533367, + "loss_iou": 0.470703125, + "loss_num": 0.008544921875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 490576952, + "step": 3884 + }, + { + "epoch": 0.996601038927724, + "grad_norm": 107.5548324584961, + "learning_rate": 5e-06, + "loss": 0.8959, + "num_input_tokens_seen": 490702824, + "step": 3885 + }, + { + "epoch": 0.996601038927724, + "loss": 1.0382554531097412, + "loss_ce": 0.001146082067862153, + "loss_iou": 0.47265625, + "loss_num": 0.01806640625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 490702824, + "step": 3885 + }, + { + "epoch": 0.9968575642916694, + "grad_norm": 46.31318664550781, + "learning_rate": 5e-06, + "loss": 0.9558, + "num_input_tokens_seen": 490828512, + "step": 3886 + }, + { + "epoch": 0.9968575642916694, + "loss": 0.9666476249694824, + "loss_ce": 0.0005831157905049622, + "loss_iou": 0.44140625, + "loss_num": 0.0166015625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 490828512, + "step": 3886 + }, + { + "epoch": 0.9971140896556147, + "grad_norm": 32.84713363647461, + "learning_rate": 5e-06, + "loss": 0.9419, + "num_input_tokens_seen": 490954252, + "step": 3887 + }, + { + "epoch": 0.9971140896556147, + "loss": 0.7826848030090332, + "loss_ce": 0.0009465296170674264, + "loss_iou": 0.369140625, + "loss_num": 0.0087890625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 490954252, + "step": 3887 + }, + { + "epoch": 0.99737061501956, + "grad_norm": 45.72207260131836, + "learning_rate": 5e-06, + "loss": 0.7937, + "num_input_tokens_seen": 491080412, + "step": 3888 + }, + { + "epoch": 0.99737061501956, + "loss": 0.8723834753036499, + "loss_ce": 0.0008014659397304058, + "loss_iou": 0.390625, + "loss_num": 0.018310546875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 491080412, + "step": 3888 + }, + { + "epoch": 0.9976271403835054, + "grad_norm": 44.55059051513672, + "learning_rate": 5e-06, + "loss": 0.9252, + "num_input_tokens_seen": 491206072, + "step": 3889 + }, + { + "epoch": 0.9976271403835054, + "loss": 0.9309748411178589, + "loss_ce": 0.0027522428426891565, + "loss_iou": 0.421875, + "loss_num": 0.016845703125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 491206072, + "step": 3889 + }, + { + "epoch": 0.9978836657474508, + "grad_norm": 42.68889617919922, + "learning_rate": 5e-06, + "loss": 1.0642, + "num_input_tokens_seen": 491332132, + "step": 3890 + }, + { + "epoch": 0.9978836657474508, + "loss": 0.9763637781143188, + "loss_ce": 0.0017544415313750505, + "loss_iou": 0.427734375, + "loss_num": 0.023681640625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 491332132, + "step": 3890 + }, + { + "epoch": 0.9981401911113962, + "grad_norm": 38.299068450927734, + "learning_rate": 5e-06, + "loss": 0.9518, + "num_input_tokens_seen": 491459088, + "step": 3891 + }, + { + "epoch": 0.9981401911113962, + "loss": 0.8780500292778015, + "loss_ce": 0.0008527666795998812, + "loss_iou": 0.416015625, + "loss_num": 0.0093994140625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 491459088, + "step": 3891 + }, + { + "epoch": 0.9983967164753415, + "grad_norm": 208.49398803710938, + "learning_rate": 5e-06, + "loss": 0.8149, + "num_input_tokens_seen": 491585608, + "step": 3892 + }, + { + "epoch": 0.9983967164753415, + "loss": 0.8733581304550171, + "loss_ce": 0.000555394624825567, + "loss_iou": 0.40234375, + "loss_num": 0.0133056640625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 491585608, + "step": 3892 + }, + { + "epoch": 0.9986532418392868, + "grad_norm": 39.03553009033203, + "learning_rate": 5e-06, + "loss": 0.9255, + "num_input_tokens_seen": 491712236, + "step": 3893 + }, + { + "epoch": 0.9986532418392868, + "loss": 0.9320447444915771, + "loss_ce": 0.00040410031215287745, + "loss_iou": 0.443359375, + "loss_num": 0.0087890625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 491712236, + "step": 3893 + }, + { + "epoch": 0.9989097672032322, + "grad_norm": 76.69193267822266, + "learning_rate": 5e-06, + "loss": 0.9782, + "num_input_tokens_seen": 491838776, + "step": 3894 + }, + { + "epoch": 0.9989097672032322, + "loss": 1.1748669147491455, + "loss_ce": 0.0029917897190898657, + "loss_iou": 0.53515625, + "loss_num": 0.021240234375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 491838776, + "step": 3894 + }, + { + "epoch": 0.9991662925671776, + "grad_norm": 39.76282501220703, + "learning_rate": 5e-06, + "loss": 0.9103, + "num_input_tokens_seen": 491964592, + "step": 3895 + }, + { + "epoch": 0.9991662925671776, + "loss": 0.8319258093833923, + "loss_ce": 0.0003828371118288487, + "loss_iou": 0.37109375, + "loss_num": 0.018310546875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 491964592, + "step": 3895 + }, + { + "epoch": 0.999422817931123, + "grad_norm": 36.749244689941406, + "learning_rate": 5e-06, + "loss": 0.8567, + "num_input_tokens_seen": 492092648, + "step": 3896 + }, + { + "epoch": 0.999422817931123, + "loss": 0.7760977745056152, + "loss_ce": 0.00046306155854836106, + "loss_iou": 0.37109375, + "loss_num": 0.006561279296875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 492092648, + "step": 3896 + }, + { + "epoch": 0.9996793432950682, + "grad_norm": 50.07822036743164, + "learning_rate": 5e-06, + "loss": 0.8941, + "num_input_tokens_seen": 492219588, + "step": 3897 + }, + { + "epoch": 0.9996793432950682, + "loss": 0.9176594018936157, + "loss_ce": 0.00359692657366395, + "loss_iou": 0.423828125, + "loss_num": 0.01348876953125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 492219588, + "step": 3897 + }, + { + "epoch": 0.9999358686590136, + "grad_norm": 49.3172607421875, + "learning_rate": 5e-06, + "loss": 0.9149, + "num_input_tokens_seen": 492346184, + "step": 3898 + }, + { + "epoch": 0.9999358686590136, + "loss": 0.9387490749359131, + "loss_ce": 0.00027248638798482716, + "loss_iou": 0.435546875, + "loss_num": 0.0135498046875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 492346184, + "step": 3898 + }, + { + "epoch": 0.9999358686590136, + "loss": 0.8766347765922546, + "loss_ce": 0.0030996587593108416, + "loss_iou": 0.4140625, + "loss_num": 0.00933837890625, + "loss_xval": 0.875, + "num_input_tokens_seen": 492378900, + "step": 3898 + }, + { + "epoch": 1.0001923940229591, + "grad_norm": 52.1972770690918, + "learning_rate": 5e-06, + "loss": 0.8409, + "num_input_tokens_seen": 492473476, + "step": 3899 + }, + { + "epoch": 1.0001923940229591, + "loss": 0.7951098680496216, + "loss_ce": 0.004094292409718037, + "loss_iou": 0.36328125, + "loss_num": 0.01300048828125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 492473476, + "step": 3899 + }, + { + "epoch": 1.0004489193869044, + "grad_norm": 50.52654266357422, + "learning_rate": 5e-06, + "loss": 0.8705, + "num_input_tokens_seen": 492599744, + "step": 3900 + }, + { + "epoch": 1.0004489193869044, + "loss": 0.7347558736801147, + "loss_ce": 0.00013673826470039785, + "loss_iou": 0.353515625, + "loss_num": 0.00531005859375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 492599744, + "step": 3900 + }, + { + "epoch": 1.0007054447508497, + "grad_norm": 48.16531753540039, + "learning_rate": 5e-06, + "loss": 0.8985, + "num_input_tokens_seen": 492725856, + "step": 3901 + }, + { + "epoch": 1.0007054447508497, + "loss": 0.8072628378868103, + "loss_ce": 0.00037806006730534136, + "loss_iou": 0.37890625, + "loss_num": 0.0093994140625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 492725856, + "step": 3901 + }, + { + "epoch": 1.0009619701147952, + "grad_norm": 40.16544723510742, + "learning_rate": 5e-06, + "loss": 0.9302, + "num_input_tokens_seen": 492853028, + "step": 3902 + }, + { + "epoch": 1.0009619701147952, + "loss": 0.8528118133544922, + "loss_ce": 0.0022258800454437733, + "loss_iou": 0.396484375, + "loss_num": 0.0113525390625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 492853028, + "step": 3902 + }, + { + "epoch": 1.0012184954787404, + "grad_norm": 35.07966232299805, + "learning_rate": 5e-06, + "loss": 0.7991, + "num_input_tokens_seen": 492979456, + "step": 3903 + }, + { + "epoch": 1.0012184954787404, + "loss": 0.7557319402694702, + "loss_ce": 0.0003608195693232119, + "loss_iou": 0.34765625, + "loss_num": 0.01171875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 492979456, + "step": 3903 + }, + { + "epoch": 1.001475020842686, + "grad_norm": 36.282470703125, + "learning_rate": 5e-06, + "loss": 0.8603, + "num_input_tokens_seen": 493105808, + "step": 3904 + }, + { + "epoch": 1.001475020842686, + "loss": 0.8001112341880798, + "loss_ce": 0.00030654820147901773, + "loss_iou": 0.384765625, + "loss_num": 0.0059814453125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 493105808, + "step": 3904 + }, + { + "epoch": 1.0017315462066312, + "grad_norm": 39.7251091003418, + "learning_rate": 5e-06, + "loss": 0.9141, + "num_input_tokens_seen": 493231852, + "step": 3905 + }, + { + "epoch": 1.0017315462066312, + "loss": 0.9437161684036255, + "loss_ce": 0.0003567825770005584, + "loss_iou": 0.421875, + "loss_num": 0.0201416015625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 493231852, + "step": 3905 + }, + { + "epoch": 1.0019880715705765, + "grad_norm": 32.84325408935547, + "learning_rate": 5e-06, + "loss": 0.8097, + "num_input_tokens_seen": 493357980, + "step": 3906 + }, + { + "epoch": 1.0019880715705765, + "loss": 0.8153329491615295, + "loss_ce": 0.00014742722851224244, + "loss_iou": 0.375, + "loss_num": 0.012939453125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 493357980, + "step": 3906 + }, + { + "epoch": 1.002244596934522, + "grad_norm": 90.8695297241211, + "learning_rate": 5e-06, + "loss": 0.9538, + "num_input_tokens_seen": 493484500, + "step": 3907 + }, + { + "epoch": 1.002244596934522, + "loss": 0.9550089836120605, + "loss_ce": 0.0009074235567823052, + "loss_iou": 0.4296875, + "loss_num": 0.01904296875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 493484500, + "step": 3907 + }, + { + "epoch": 1.0025011222984672, + "grad_norm": 45.713768005371094, + "learning_rate": 5e-06, + "loss": 0.8525, + "num_input_tokens_seen": 493610888, + "step": 3908 + }, + { + "epoch": 1.0025011222984672, + "loss": 0.9276581406593323, + "loss_ce": 0.0009003114537335932, + "loss_iou": 0.43359375, + "loss_num": 0.0118408203125, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 493610888, + "step": 3908 + }, + { + "epoch": 1.0027576476624127, + "grad_norm": 47.5828971862793, + "learning_rate": 5e-06, + "loss": 0.9352, + "num_input_tokens_seen": 493736884, + "step": 3909 + }, + { + "epoch": 1.0027576476624127, + "loss": 0.8822228908538818, + "loss_ce": 0.0018518210854381323, + "loss_iou": 0.412109375, + "loss_num": 0.01141357421875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 493736884, + "step": 3909 + }, + { + "epoch": 1.003014173026358, + "grad_norm": 47.254783630371094, + "learning_rate": 5e-06, + "loss": 0.9239, + "num_input_tokens_seen": 493864328, + "step": 3910 + }, + { + "epoch": 1.003014173026358, + "loss": 0.9041429758071899, + "loss_ce": 0.00033433677162975073, + "loss_iou": 0.42578125, + "loss_num": 0.01043701171875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 493864328, + "step": 3910 + }, + { + "epoch": 1.0032706983903033, + "grad_norm": 47.286190032958984, + "learning_rate": 5e-06, + "loss": 0.871, + "num_input_tokens_seen": 493988448, + "step": 3911 + }, + { + "epoch": 1.0032706983903033, + "loss": 0.978190004825592, + "loss_ce": 0.00016268975741695613, + "loss_iou": 0.451171875, + "loss_num": 0.0150146484375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 493988448, + "step": 3911 + }, + { + "epoch": 1.0035272237542487, + "grad_norm": 51.643131256103516, + "learning_rate": 5e-06, + "loss": 0.8899, + "num_input_tokens_seen": 494113828, + "step": 3912 + }, + { + "epoch": 1.0035272237542487, + "loss": 0.9787276387214661, + "loss_ce": 0.001676888670772314, + "loss_iou": 0.453125, + "loss_num": 0.0137939453125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 494113828, + "step": 3912 + }, + { + "epoch": 1.003783749118194, + "grad_norm": 29.802900314331055, + "learning_rate": 5e-06, + "loss": 0.9291, + "num_input_tokens_seen": 494239028, + "step": 3913 + }, + { + "epoch": 1.003783749118194, + "loss": 0.9184927940368652, + "loss_ce": 0.005895160138607025, + "loss_iou": 0.41015625, + "loss_num": 0.01806640625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 494239028, + "step": 3913 + }, + { + "epoch": 1.0040402744821395, + "grad_norm": 30.81559181213379, + "learning_rate": 5e-06, + "loss": 0.7138, + "num_input_tokens_seen": 494365252, + "step": 3914 + }, + { + "epoch": 1.0040402744821395, + "loss": 0.8059249520301819, + "loss_ce": 0.0002608997456263751, + "loss_iou": 0.390625, + "loss_num": 0.00518798828125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 494365252, + "step": 3914 + }, + { + "epoch": 1.0042967998460848, + "grad_norm": 18.271799087524414, + "learning_rate": 5e-06, + "loss": 0.8452, + "num_input_tokens_seen": 494490672, + "step": 3915 + }, + { + "epoch": 1.0042967998460848, + "loss": 0.7617598176002502, + "loss_ce": 0.002116272458806634, + "loss_iou": 0.35546875, + "loss_num": 0.00958251953125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 494490672, + "step": 3915 + }, + { + "epoch": 1.00455332521003, + "grad_norm": 25.7414493560791, + "learning_rate": 5e-06, + "loss": 0.8823, + "num_input_tokens_seen": 494616836, + "step": 3916 + }, + { + "epoch": 1.00455332521003, + "loss": 0.8948987722396851, + "loss_ce": 0.0001234516385011375, + "loss_iou": 0.4140625, + "loss_num": 0.01348876953125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 494616836, + "step": 3916 + }, + { + "epoch": 1.0048098505739755, + "grad_norm": 42.370235443115234, + "learning_rate": 5e-06, + "loss": 0.8076, + "num_input_tokens_seen": 494743760, + "step": 3917 + }, + { + "epoch": 1.0048098505739755, + "loss": 0.7927297353744507, + "loss_ce": 0.000249220960540697, + "loss_iou": 0.375, + "loss_num": 0.00885009765625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 494743760, + "step": 3917 + }, + { + "epoch": 1.0050663759379208, + "grad_norm": 46.720481872558594, + "learning_rate": 5e-06, + "loss": 0.9328, + "num_input_tokens_seen": 494870696, + "step": 3918 + }, + { + "epoch": 1.0050663759379208, + "loss": 0.8038618564605713, + "loss_ce": 0.00039507405017502606, + "loss_iou": 0.380859375, + "loss_num": 0.0086669921875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 494870696, + "step": 3918 + }, + { + "epoch": 1.0053229013018663, + "grad_norm": 43.13616943359375, + "learning_rate": 5e-06, + "loss": 1.1243, + "num_input_tokens_seen": 494995560, + "step": 3919 + }, + { + "epoch": 1.0053229013018663, + "loss": 1.311650276184082, + "loss_ce": 0.00207989732734859, + "loss_iou": 0.546875, + "loss_num": 0.043212890625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 494995560, + "step": 3919 + }, + { + "epoch": 1.0055794266658116, + "grad_norm": 36.680213928222656, + "learning_rate": 5e-06, + "loss": 0.8465, + "num_input_tokens_seen": 495121292, + "step": 3920 + }, + { + "epoch": 1.0055794266658116, + "loss": 0.8631412982940674, + "loss_ce": 0.0003483020991552621, + "loss_iou": 0.404296875, + "loss_num": 0.0107421875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 495121292, + "step": 3920 + }, + { + "epoch": 1.0058359520297568, + "grad_norm": 35.73379898071289, + "learning_rate": 5e-06, + "loss": 0.8473, + "num_input_tokens_seen": 495247500, + "step": 3921 + }, + { + "epoch": 1.0058359520297568, + "loss": 0.8202420473098755, + "loss_ce": 0.0013943969970569015, + "loss_iou": 0.384765625, + "loss_num": 0.00958251953125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 495247500, + "step": 3921 + }, + { + "epoch": 1.0060924773937023, + "grad_norm": 29.253055572509766, + "learning_rate": 5e-06, + "loss": 1.0722, + "num_input_tokens_seen": 495374156, + "step": 3922 + }, + { + "epoch": 1.0060924773937023, + "loss": 1.1550666093826294, + "loss_ce": 0.0017462980467826128, + "loss_iou": 0.51171875, + "loss_num": 0.0252685546875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 495374156, + "step": 3922 + }, + { + "epoch": 1.0063490027576476, + "grad_norm": 24.371746063232422, + "learning_rate": 5e-06, + "loss": 0.8495, + "num_input_tokens_seen": 495499156, + "step": 3923 + }, + { + "epoch": 1.0063490027576476, + "loss": 0.9380936622619629, + "loss_ce": 0.0013260412961244583, + "loss_iou": 0.421875, + "loss_num": 0.0189208984375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 495499156, + "step": 3923 + }, + { + "epoch": 1.006605528121593, + "grad_norm": 31.46317481994629, + "learning_rate": 5e-06, + "loss": 0.85, + "num_input_tokens_seen": 495624448, + "step": 3924 + }, + { + "epoch": 1.006605528121593, + "loss": 0.9315320253372192, + "loss_ce": 0.002332765841856599, + "loss_iou": 0.431640625, + "loss_num": 0.0130615234375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 495624448, + "step": 3924 + }, + { + "epoch": 1.0068620534855384, + "grad_norm": 26.452194213867188, + "learning_rate": 5e-06, + "loss": 0.8722, + "num_input_tokens_seen": 495750724, + "step": 3925 + }, + { + "epoch": 1.0068620534855384, + "loss": 0.6987533569335938, + "loss_ce": 0.0002670356188900769, + "loss_iou": 0.333984375, + "loss_num": 0.006195068359375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 495750724, + "step": 3925 + }, + { + "epoch": 1.0071185788494836, + "grad_norm": 42.56757736206055, + "learning_rate": 5e-06, + "loss": 0.8457, + "num_input_tokens_seen": 495876576, + "step": 3926 + }, + { + "epoch": 1.0071185788494836, + "loss": 0.76036536693573, + "loss_ce": 0.0005997934495098889, + "loss_iou": 0.357421875, + "loss_num": 0.00885009765625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 495876576, + "step": 3926 + }, + { + "epoch": 1.0073751042134291, + "grad_norm": 48.13119125366211, + "learning_rate": 5e-06, + "loss": 0.8241, + "num_input_tokens_seen": 496002468, + "step": 3927 + }, + { + "epoch": 1.0073751042134291, + "loss": 0.9118061065673828, + "loss_ce": 0.00018498365534469485, + "loss_iou": 0.42578125, + "loss_num": 0.0120849609375, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 496002468, + "step": 3927 + }, + { + "epoch": 1.0076316295773744, + "grad_norm": 49.43670654296875, + "learning_rate": 5e-06, + "loss": 0.8234, + "num_input_tokens_seen": 496129340, + "step": 3928 + }, + { + "epoch": 1.0076316295773744, + "loss": 0.6923571228981018, + "loss_ce": 0.0004625777364708483, + "loss_iou": 0.322265625, + "loss_num": 0.009765625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 496129340, + "step": 3928 + }, + { + "epoch": 1.00788815494132, + "grad_norm": 57.90275573730469, + "learning_rate": 5e-06, + "loss": 0.8065, + "num_input_tokens_seen": 496255892, + "step": 3929 + }, + { + "epoch": 1.00788815494132, + "loss": 0.9412513971328735, + "loss_ce": 0.0032630842179059982, + "loss_iou": 0.427734375, + "loss_num": 0.0167236328125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 496255892, + "step": 3929 + }, + { + "epoch": 1.0081446803052652, + "grad_norm": 50.52181625366211, + "learning_rate": 5e-06, + "loss": 0.9305, + "num_input_tokens_seen": 496382920, + "step": 3930 + }, + { + "epoch": 1.0081446803052652, + "loss": 1.0320907831192017, + "loss_ce": 0.00035254110116511583, + "loss_iou": 0.474609375, + "loss_num": 0.0166015625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 496382920, + "step": 3930 + }, + { + "epoch": 1.0084012056692107, + "grad_norm": 40.95301055908203, + "learning_rate": 5e-06, + "loss": 0.9189, + "num_input_tokens_seen": 496510480, + "step": 3931 + }, + { + "epoch": 1.0084012056692107, + "loss": 1.1019829511642456, + "loss_ce": 0.003350107464939356, + "loss_iou": 0.494140625, + "loss_num": 0.021728515625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 496510480, + "step": 3931 + }, + { + "epoch": 1.008657731033156, + "grad_norm": 37.564937591552734, + "learning_rate": 5e-06, + "loss": 0.8916, + "num_input_tokens_seen": 496636320, + "step": 3932 + }, + { + "epoch": 1.008657731033156, + "loss": 0.9465269446372986, + "loss_ce": 0.0007261448190547526, + "loss_iou": 0.42578125, + "loss_num": 0.0185546875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 496636320, + "step": 3932 + }, + { + "epoch": 1.0089142563971012, + "grad_norm": 36.3419303894043, + "learning_rate": 5e-06, + "loss": 0.923, + "num_input_tokens_seen": 496762904, + "step": 3933 + }, + { + "epoch": 1.0089142563971012, + "loss": 1.074247121810913, + "loss_ce": 0.001981595065444708, + "loss_iou": 0.482421875, + "loss_num": 0.021240234375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 496762904, + "step": 3933 + }, + { + "epoch": 1.0091707817610467, + "grad_norm": 30.732385635375977, + "learning_rate": 5e-06, + "loss": 0.8116, + "num_input_tokens_seen": 496889944, + "step": 3934 + }, + { + "epoch": 1.0091707817610467, + "loss": 0.7070404887199402, + "loss_ce": 0.0007416388834826648, + "loss_iou": 0.33203125, + "loss_num": 0.00811767578125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 496889944, + "step": 3934 + }, + { + "epoch": 1.009427307124992, + "grad_norm": 31.32607078552246, + "learning_rate": 5e-06, + "loss": 0.8471, + "num_input_tokens_seen": 497017580, + "step": 3935 + }, + { + "epoch": 1.009427307124992, + "loss": 0.8910384178161621, + "loss_ce": 0.0009017073316499591, + "loss_iou": 0.41796875, + "loss_num": 0.01055908203125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 497017580, + "step": 3935 + }, + { + "epoch": 1.0096838324889374, + "grad_norm": 39.129268646240234, + "learning_rate": 5e-06, + "loss": 0.8609, + "num_input_tokens_seen": 497142380, + "step": 3936 + }, + { + "epoch": 1.0096838324889374, + "loss": 0.7458942532539368, + "loss_ce": 0.00028878121520392597, + "loss_iou": 0.341796875, + "loss_num": 0.0126953125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 497142380, + "step": 3936 + }, + { + "epoch": 1.0099403578528827, + "grad_norm": 39.07707595825195, + "learning_rate": 5e-06, + "loss": 0.8597, + "num_input_tokens_seen": 497268244, + "step": 3937 + }, + { + "epoch": 1.0099403578528827, + "loss": 1.001796007156372, + "loss_ce": 0.0032607996836304665, + "loss_iou": 0.4609375, + "loss_num": 0.0157470703125, + "loss_xval": 1.0, + "num_input_tokens_seen": 497268244, + "step": 3937 + }, + { + "epoch": 1.010196883216828, + "grad_norm": 44.83042907714844, + "learning_rate": 5e-06, + "loss": 0.8781, + "num_input_tokens_seen": 497394788, + "step": 3938 + }, + { + "epoch": 1.010196883216828, + "loss": 0.8726329803466797, + "loss_ce": 0.000562642642762512, + "loss_iou": 0.404296875, + "loss_num": 0.0125732421875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 497394788, + "step": 3938 + }, + { + "epoch": 1.0104534085807735, + "grad_norm": 53.024410247802734, + "learning_rate": 5e-06, + "loss": 0.8369, + "num_input_tokens_seen": 497521328, + "step": 3939 + }, + { + "epoch": 1.0104534085807735, + "loss": 0.7666683197021484, + "loss_ce": 0.0012874531093984842, + "loss_iou": 0.3515625, + "loss_num": 0.01251220703125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 497521328, + "step": 3939 + }, + { + "epoch": 1.0107099339447188, + "grad_norm": 41.30411148071289, + "learning_rate": 5e-06, + "loss": 1.0136, + "num_input_tokens_seen": 497647480, + "step": 3940 + }, + { + "epoch": 1.0107099339447188, + "loss": 0.9739000201225281, + "loss_ce": 0.00026720319874584675, + "loss_iou": 0.44921875, + "loss_num": 0.01507568359375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 497647480, + "step": 3940 + }, + { + "epoch": 1.0109664593086642, + "grad_norm": 32.45027160644531, + "learning_rate": 5e-06, + "loss": 0.8706, + "num_input_tokens_seen": 497774592, + "step": 3941 + }, + { + "epoch": 1.0109664593086642, + "loss": 0.7725317478179932, + "loss_ce": 0.0005590869695879519, + "loss_iou": 0.361328125, + "loss_num": 0.0101318359375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 497774592, + "step": 3941 + }, + { + "epoch": 1.0112229846726095, + "grad_norm": 26.58704376220703, + "learning_rate": 5e-06, + "loss": 0.8708, + "num_input_tokens_seen": 497900848, + "step": 3942 + }, + { + "epoch": 1.0112229846726095, + "loss": 1.003952980041504, + "loss_ce": 0.001511571230366826, + "loss_iou": 0.470703125, + "loss_num": 0.0118408203125, + "loss_xval": 1.0, + "num_input_tokens_seen": 497900848, + "step": 3942 + }, + { + "epoch": 1.0114795100365548, + "grad_norm": 13.316507339477539, + "learning_rate": 5e-06, + "loss": 0.9349, + "num_input_tokens_seen": 498025704, + "step": 3943 + }, + { + "epoch": 1.0114795100365548, + "loss": 0.8489960432052612, + "loss_ce": 0.0003632181033026427, + "loss_iou": 0.396484375, + "loss_num": 0.010986328125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 498025704, + "step": 3943 + }, + { + "epoch": 1.0117360354005003, + "grad_norm": 20.75684928894043, + "learning_rate": 5e-06, + "loss": 0.8132, + "num_input_tokens_seen": 498152604, + "step": 3944 + }, + { + "epoch": 1.0117360354005003, + "loss": 0.8162523508071899, + "loss_ce": 0.00033437402453273535, + "loss_iou": 0.3828125, + "loss_num": 0.0101318359375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 498152604, + "step": 3944 + }, + { + "epoch": 1.0119925607644455, + "grad_norm": 26.90835189819336, + "learning_rate": 5e-06, + "loss": 0.9493, + "num_input_tokens_seen": 498278296, + "step": 3945 + }, + { + "epoch": 1.0119925607644455, + "loss": 0.8673291802406311, + "loss_ce": 0.0006300057866610587, + "loss_iou": 0.3984375, + "loss_num": 0.0135498046875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 498278296, + "step": 3945 + }, + { + "epoch": 1.012249086128391, + "grad_norm": 44.762996673583984, + "learning_rate": 5e-06, + "loss": 0.9027, + "num_input_tokens_seen": 498403232, + "step": 3946 + }, + { + "epoch": 1.012249086128391, + "loss": 0.7795457243919373, + "loss_ce": 0.0031785385217517614, + "loss_iou": 0.361328125, + "loss_num": 0.0107421875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 498403232, + "step": 3946 + }, + { + "epoch": 1.0125056114923363, + "grad_norm": 44.58448028564453, + "learning_rate": 5e-06, + "loss": 1.0598, + "num_input_tokens_seen": 498529604, + "step": 3947 + }, + { + "epoch": 1.0125056114923363, + "loss": 1.047034502029419, + "loss_ce": 0.002112600952386856, + "loss_iou": 0.482421875, + "loss_num": 0.0157470703125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 498529604, + "step": 3947 + }, + { + "epoch": 1.0127621368562816, + "grad_norm": 26.825424194335938, + "learning_rate": 5e-06, + "loss": 0.9707, + "num_input_tokens_seen": 498656352, + "step": 3948 + }, + { + "epoch": 1.0127621368562816, + "loss": 0.8654496073722839, + "loss_ce": 0.0007035282324068248, + "loss_iou": 0.3984375, + "loss_num": 0.01348876953125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 498656352, + "step": 3948 + }, + { + "epoch": 1.013018662220227, + "grad_norm": 35.59690856933594, + "learning_rate": 5e-06, + "loss": 0.9415, + "num_input_tokens_seen": 498783376, + "step": 3949 + }, + { + "epoch": 1.013018662220227, + "loss": 0.7971255779266357, + "loss_ce": 0.002203668002039194, + "loss_iou": 0.36328125, + "loss_num": 0.01318359375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 498783376, + "step": 3949 + }, + { + "epoch": 1.0132751875841723, + "grad_norm": 50.45384216308594, + "learning_rate": 5e-06, + "loss": 0.8701, + "num_input_tokens_seen": 498909356, + "step": 3950 + }, + { + "epoch": 1.0132751875841723, + "loss": 0.8379033803939819, + "loss_ce": 0.003430750919505954, + "loss_iou": 0.390625, + "loss_num": 0.0111083984375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 498909356, + "step": 3950 + }, + { + "epoch": 1.0135317129481178, + "grad_norm": 46.73894500732422, + "learning_rate": 5e-06, + "loss": 0.8818, + "num_input_tokens_seen": 499035956, + "step": 3951 + }, + { + "epoch": 1.0135317129481178, + "loss": 0.9373998641967773, + "loss_ce": 0.003562025958672166, + "loss_iou": 0.4375, + "loss_num": 0.01165771484375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 499035956, + "step": 3951 + }, + { + "epoch": 1.013788238312063, + "grad_norm": 59.11717224121094, + "learning_rate": 5e-06, + "loss": 0.9643, + "num_input_tokens_seen": 499162044, + "step": 3952 + }, + { + "epoch": 1.013788238312063, + "loss": 1.129201889038086, + "loss_ce": 0.00029557652305811644, + "loss_iou": 0.50390625, + "loss_num": 0.02490234375, + "loss_xval": 1.125, + "num_input_tokens_seen": 499162044, + "step": 3952 + }, + { + "epoch": 1.0140447636760084, + "grad_norm": 53.595821380615234, + "learning_rate": 5e-06, + "loss": 1.0545, + "num_input_tokens_seen": 499289192, + "step": 3953 + }, + { + "epoch": 1.0140447636760084, + "loss": 1.0467408895492554, + "loss_ce": 0.0008424907573498785, + "loss_iou": 0.48046875, + "loss_num": 0.01708984375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 499289192, + "step": 3953 + }, + { + "epoch": 1.0143012890399539, + "grad_norm": 28.779396057128906, + "learning_rate": 5e-06, + "loss": 1.0036, + "num_input_tokens_seen": 499414972, + "step": 3954 + }, + { + "epoch": 1.0143012890399539, + "loss": 0.9531633853912354, + "loss_ce": 0.002968083368614316, + "loss_iou": 0.4296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 499414972, + "step": 3954 + }, + { + "epoch": 1.0145578144038991, + "grad_norm": 28.72728729248047, + "learning_rate": 5e-06, + "loss": 0.8755, + "num_input_tokens_seen": 499541784, + "step": 3955 + }, + { + "epoch": 1.0145578144038991, + "loss": 0.7312982082366943, + "loss_ce": 0.0013177114306017756, + "loss_iou": 0.337890625, + "loss_num": 0.01104736328125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 499541784, + "step": 3955 + }, + { + "epoch": 1.0148143397678446, + "grad_norm": 44.01615905761719, + "learning_rate": 5e-06, + "loss": 0.8875, + "num_input_tokens_seen": 499668716, + "step": 3956 + }, + { + "epoch": 1.0148143397678446, + "loss": 0.8213711977005005, + "loss_ce": 0.002035248326137662, + "loss_iou": 0.3828125, + "loss_num": 0.0107421875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 499668716, + "step": 3956 + }, + { + "epoch": 1.01507086513179, + "grad_norm": 64.2318344116211, + "learning_rate": 5e-06, + "loss": 0.9606, + "num_input_tokens_seen": 499795352, + "step": 3957 + }, + { + "epoch": 1.01507086513179, + "loss": 0.9073905944824219, + "loss_ce": 0.00040818777051754296, + "loss_iou": 0.41015625, + "loss_num": 0.0169677734375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 499795352, + "step": 3957 + }, + { + "epoch": 1.0153273904957352, + "grad_norm": 48.10387420654297, + "learning_rate": 5e-06, + "loss": 0.8471, + "num_input_tokens_seen": 499921192, + "step": 3958 + }, + { + "epoch": 1.0153273904957352, + "loss": 0.849977970123291, + "loss_ce": 0.00012447647168301046, + "loss_iou": 0.41015625, + "loss_num": 0.0057373046875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 499921192, + "step": 3958 + }, + { + "epoch": 1.0155839158596807, + "grad_norm": 30.44474220275879, + "learning_rate": 5e-06, + "loss": 0.9518, + "num_input_tokens_seen": 500047748, + "step": 3959 + }, + { + "epoch": 1.0155839158596807, + "loss": 0.7763447165489197, + "loss_ce": 0.002174774883314967, + "loss_iou": 0.357421875, + "loss_num": 0.0120849609375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 500047748, + "step": 3959 + }, + { + "epoch": 1.015840441223626, + "grad_norm": 33.560089111328125, + "learning_rate": 5e-06, + "loss": 0.7711, + "num_input_tokens_seen": 500174052, + "step": 3960 + }, + { + "epoch": 1.015840441223626, + "loss": 0.8162609934806824, + "loss_ce": 0.001319607370533049, + "loss_iou": 0.388671875, + "loss_num": 0.00750732421875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 500174052, + "step": 3960 + }, + { + "epoch": 1.0160969665875714, + "grad_norm": 34.52674102783203, + "learning_rate": 5e-06, + "loss": 0.8428, + "num_input_tokens_seen": 500301188, + "step": 3961 + }, + { + "epoch": 1.0160969665875714, + "loss": 0.7004778385162354, + "loss_ce": 0.000526661635376513, + "loss_iou": 0.3359375, + "loss_num": 0.005615234375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 500301188, + "step": 3961 + }, + { + "epoch": 1.0163534919515167, + "grad_norm": 49.233726501464844, + "learning_rate": 5e-06, + "loss": 0.8819, + "num_input_tokens_seen": 500427748, + "step": 3962 + }, + { + "epoch": 1.0163534919515167, + "loss": 0.7876882553100586, + "loss_ce": 0.0010671776253730059, + "loss_iou": 0.3671875, + "loss_num": 0.0107421875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 500427748, + "step": 3962 + }, + { + "epoch": 1.016610017315462, + "grad_norm": 59.38427734375, + "learning_rate": 5e-06, + "loss": 0.9217, + "num_input_tokens_seen": 500553264, + "step": 3963 + }, + { + "epoch": 1.016610017315462, + "loss": 0.80934739112854, + "loss_ce": 0.0002653077244758606, + "loss_iou": 0.38671875, + "loss_num": 0.007110595703125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 500553264, + "step": 3963 + }, + { + "epoch": 1.0168665426794075, + "grad_norm": 85.82457733154297, + "learning_rate": 5e-06, + "loss": 0.9401, + "num_input_tokens_seen": 500679300, + "step": 3964 + }, + { + "epoch": 1.0168665426794075, + "loss": 0.7470293045043945, + "loss_ce": 0.0014238629955798388, + "loss_iou": 0.345703125, + "loss_num": 0.01080322265625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 500679300, + "step": 3964 + }, + { + "epoch": 1.0171230680433527, + "grad_norm": 36.33940124511719, + "learning_rate": 5e-06, + "loss": 0.8153, + "num_input_tokens_seen": 500806164, + "step": 3965 + }, + { + "epoch": 1.0171230680433527, + "loss": 0.9504098892211914, + "loss_ce": 0.00021460256539285183, + "loss_iou": 0.44921875, + "loss_num": 0.0107421875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 500806164, + "step": 3965 + }, + { + "epoch": 1.0173795934072982, + "grad_norm": 50.12022018432617, + "learning_rate": 5e-06, + "loss": 0.8028, + "num_input_tokens_seen": 500934344, + "step": 3966 + }, + { + "epoch": 1.0173795934072982, + "loss": 0.8113610148429871, + "loss_ce": 8.17443142295815e-05, + "loss_iou": 0.384765625, + "loss_num": 0.00872802734375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 500934344, + "step": 3966 + }, + { + "epoch": 1.0176361187712435, + "grad_norm": 69.47972106933594, + "learning_rate": 5e-06, + "loss": 0.865, + "num_input_tokens_seen": 501062100, + "step": 3967 + }, + { + "epoch": 1.0176361187712435, + "loss": 0.7863461971282959, + "loss_ce": 0.00021332697360776365, + "loss_iou": 0.37109375, + "loss_num": 0.0084228515625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 501062100, + "step": 3967 + }, + { + "epoch": 1.0178926441351888, + "grad_norm": 48.74889373779297, + "learning_rate": 5e-06, + "loss": 0.9913, + "num_input_tokens_seen": 501189164, + "step": 3968 + }, + { + "epoch": 1.0178926441351888, + "loss": 0.8979724645614624, + "loss_ce": 0.0019764485768973827, + "loss_iou": 0.421875, + "loss_num": 0.0101318359375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 501189164, + "step": 3968 + }, + { + "epoch": 1.0181491694991343, + "grad_norm": 28.98969268798828, + "learning_rate": 5e-06, + "loss": 0.8207, + "num_input_tokens_seen": 501315256, + "step": 3969 + }, + { + "epoch": 1.0181491694991343, + "loss": 0.9210138916969299, + "loss_ce": 0.0025568734854459763, + "loss_iou": 0.416015625, + "loss_num": 0.0172119140625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 501315256, + "step": 3969 + }, + { + "epoch": 1.0184056948630795, + "grad_norm": 39.483909606933594, + "learning_rate": 5e-06, + "loss": 0.8868, + "num_input_tokens_seen": 501441856, + "step": 3970 + }, + { + "epoch": 1.0184056948630795, + "loss": 0.7234241962432861, + "loss_ce": 0.0007679525879211724, + "loss_iou": 0.34375, + "loss_num": 0.0072021484375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 501441856, + "step": 3970 + }, + { + "epoch": 1.018662220227025, + "grad_norm": 59.87135696411133, + "learning_rate": 5e-06, + "loss": 0.8441, + "num_input_tokens_seen": 501569224, + "step": 3971 + }, + { + "epoch": 1.018662220227025, + "loss": 0.9232269525527954, + "loss_ce": 0.00037537614116445184, + "loss_iou": 0.42578125, + "loss_num": 0.01470947265625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 501569224, + "step": 3971 + }, + { + "epoch": 1.0189187455909703, + "grad_norm": 47.71225357055664, + "learning_rate": 5e-06, + "loss": 0.8857, + "num_input_tokens_seen": 501696224, + "step": 3972 + }, + { + "epoch": 1.0189187455909703, + "loss": 0.813605546951294, + "loss_ce": 0.0011055127251893282, + "loss_iou": 0.37109375, + "loss_num": 0.01385498046875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 501696224, + "step": 3972 + }, + { + "epoch": 1.0191752709549156, + "grad_norm": 43.74977111816406, + "learning_rate": 5e-06, + "loss": 0.8344, + "num_input_tokens_seen": 501823212, + "step": 3973 + }, + { + "epoch": 1.0191752709549156, + "loss": 0.9106391072273254, + "loss_ce": 0.0007269838824868202, + "loss_iou": 0.41796875, + "loss_num": 0.01507568359375, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 501823212, + "step": 3973 + }, + { + "epoch": 1.019431796318861, + "grad_norm": 37.20829391479492, + "learning_rate": 5e-06, + "loss": 0.9255, + "num_input_tokens_seen": 501949376, + "step": 3974 + }, + { + "epoch": 1.019431796318861, + "loss": 1.1467788219451904, + "loss_ce": 0.003712439676746726, + "loss_iou": 0.51171875, + "loss_num": 0.0244140625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 501949376, + "step": 3974 + }, + { + "epoch": 1.0196883216828063, + "grad_norm": 29.063762664794922, + "learning_rate": 5e-06, + "loss": 0.8083, + "num_input_tokens_seen": 502075488, + "step": 3975 + }, + { + "epoch": 1.0196883216828063, + "loss": 0.8571223020553589, + "loss_ce": 0.0006769968895241618, + "loss_iou": 0.388671875, + "loss_num": 0.01611328125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 502075488, + "step": 3975 + }, + { + "epoch": 1.0199448470467518, + "grad_norm": 33.12002944946289, + "learning_rate": 5e-06, + "loss": 0.8345, + "num_input_tokens_seen": 502201912, + "step": 3976 + }, + { + "epoch": 1.0199448470467518, + "loss": 0.8574131727218628, + "loss_ce": 0.0036533833481371403, + "loss_iou": 0.388671875, + "loss_num": 0.01544189453125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 502201912, + "step": 3976 + }, + { + "epoch": 1.020201372410697, + "grad_norm": 26.13715171813965, + "learning_rate": 5e-06, + "loss": 0.8785, + "num_input_tokens_seen": 502328340, + "step": 3977 + }, + { + "epoch": 1.020201372410697, + "loss": 0.9527499675750732, + "loss_ce": 0.0015781213296577334, + "loss_iou": 0.4375, + "loss_num": 0.01483154296875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 502328340, + "step": 3977 + }, + { + "epoch": 1.0204578977746426, + "grad_norm": 42.302223205566406, + "learning_rate": 5e-06, + "loss": 0.8736, + "num_input_tokens_seen": 502455052, + "step": 3978 + }, + { + "epoch": 1.0204578977746426, + "loss": 0.7269701957702637, + "loss_ce": 0.00040766061283648014, + "loss_iou": 0.33984375, + "loss_num": 0.00946044921875, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 502455052, + "step": 3978 + }, + { + "epoch": 1.0207144231385878, + "grad_norm": 38.20693588256836, + "learning_rate": 5e-06, + "loss": 0.8772, + "num_input_tokens_seen": 502582048, + "step": 3979 + }, + { + "epoch": 1.0207144231385878, + "loss": 0.8737525939941406, + "loss_ce": 0.0024147070944309235, + "loss_iou": 0.396484375, + "loss_num": 0.0157470703125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 502582048, + "step": 3979 + }, + { + "epoch": 1.0209709485025331, + "grad_norm": 38.8513069152832, + "learning_rate": 5e-06, + "loss": 0.809, + "num_input_tokens_seen": 502707676, + "step": 3980 + }, + { + "epoch": 1.0209709485025331, + "loss": 0.7334942817687988, + "loss_ce": 0.00033998285653069615, + "loss_iou": 0.349609375, + "loss_num": 0.006927490234375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 502707676, + "step": 3980 + }, + { + "epoch": 1.0212274738664786, + "grad_norm": 47.11493682861328, + "learning_rate": 5e-06, + "loss": 0.8135, + "num_input_tokens_seen": 502834064, + "step": 3981 + }, + { + "epoch": 1.0212274738664786, + "loss": 0.8948256373405457, + "loss_ce": 0.00029437366174533963, + "loss_iou": 0.4140625, + "loss_num": 0.01361083984375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 502834064, + "step": 3981 + }, + { + "epoch": 1.0214839992304239, + "grad_norm": 37.914485931396484, + "learning_rate": 5e-06, + "loss": 1.0054, + "num_input_tokens_seen": 502959544, + "step": 3982 + }, + { + "epoch": 1.0214839992304239, + "loss": 0.9962164163589478, + "loss_ce": 0.0010992008028551936, + "loss_iou": 0.4609375, + "loss_num": 0.01483154296875, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 502959544, + "step": 3982 + }, + { + "epoch": 1.0217405245943694, + "grad_norm": 29.836000442504883, + "learning_rate": 5e-06, + "loss": 0.7611, + "num_input_tokens_seen": 503084628, + "step": 3983 + }, + { + "epoch": 1.0217405245943694, + "loss": 0.812404453754425, + "loss_ce": 0.0001485995016992092, + "loss_iou": 0.38671875, + "loss_num": 0.007232666015625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 503084628, + "step": 3983 + }, + { + "epoch": 1.0219970499583146, + "grad_norm": 31.154399871826172, + "learning_rate": 5e-06, + "loss": 0.8518, + "num_input_tokens_seen": 503209476, + "step": 3984 + }, + { + "epoch": 1.0219970499583146, + "loss": 1.035015344619751, + "loss_ce": 0.0018122168257832527, + "loss_iou": 0.46875, + "loss_num": 0.01904296875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 503209476, + "step": 3984 + }, + { + "epoch": 1.02225357532226, + "grad_norm": 24.668697357177734, + "learning_rate": 5e-06, + "loss": 0.9179, + "num_input_tokens_seen": 503336860, + "step": 3985 + }, + { + "epoch": 1.02225357532226, + "loss": 0.9954509735107422, + "loss_ce": 0.00033376453211531043, + "loss_iou": 0.46484375, + "loss_num": 0.01300048828125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 503336860, + "step": 3985 + }, + { + "epoch": 1.0225101006862054, + "grad_norm": 19.751623153686523, + "learning_rate": 5e-06, + "loss": 1.0243, + "num_input_tokens_seen": 503462660, + "step": 3986 + }, + { + "epoch": 1.0225101006862054, + "loss": 1.1702947616577148, + "loss_ce": 0.0013495611492544413, + "loss_iou": 0.5234375, + "loss_num": 0.02392578125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 503462660, + "step": 3986 + }, + { + "epoch": 1.0227666260501507, + "grad_norm": 25.5441837310791, + "learning_rate": 5e-06, + "loss": 0.8762, + "num_input_tokens_seen": 503587816, + "step": 3987 + }, + { + "epoch": 1.0227666260501507, + "loss": 0.7900753021240234, + "loss_ce": 0.0007686770986765623, + "loss_iou": 0.369140625, + "loss_num": 0.010009765625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 503587816, + "step": 3987 + }, + { + "epoch": 1.0230231514140962, + "grad_norm": 33.589996337890625, + "learning_rate": 5e-06, + "loss": 0.8415, + "num_input_tokens_seen": 503714992, + "step": 3988 + }, + { + "epoch": 1.0230231514140962, + "loss": 0.7839004993438721, + "loss_ce": 0.0006973581039346755, + "loss_iou": 0.361328125, + "loss_num": 0.012451171875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 503714992, + "step": 3988 + }, + { + "epoch": 1.0232796767780414, + "grad_norm": 47.386512756347656, + "learning_rate": 5e-06, + "loss": 0.8944, + "num_input_tokens_seen": 503840908, + "step": 3989 + }, + { + "epoch": 1.0232796767780414, + "loss": 0.9626175165176392, + "loss_ce": 0.00021520820155274123, + "loss_iou": 0.44140625, + "loss_num": 0.015380859375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 503840908, + "step": 3989 + }, + { + "epoch": 1.0235362021419867, + "grad_norm": 39.97118377685547, + "learning_rate": 5e-06, + "loss": 0.8851, + "num_input_tokens_seen": 503966920, + "step": 3990 + }, + { + "epoch": 1.0235362021419867, + "loss": 0.9736287593841553, + "loss_ce": 0.0019490504637360573, + "loss_iou": 0.4609375, + "loss_num": 0.0096435546875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 503966920, + "step": 3990 + }, + { + "epoch": 1.0237927275059322, + "grad_norm": 29.255727767944336, + "learning_rate": 5e-06, + "loss": 0.8736, + "num_input_tokens_seen": 504093116, + "step": 3991 + }, + { + "epoch": 1.0237927275059322, + "loss": 0.7151129245758057, + "loss_ce": 0.0010016151936724782, + "loss_iou": 0.34375, + "loss_num": 0.005279541015625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 504093116, + "step": 3991 + }, + { + "epoch": 1.0240492528698775, + "grad_norm": 36.89223861694336, + "learning_rate": 5e-06, + "loss": 0.8599, + "num_input_tokens_seen": 504219052, + "step": 3992 + }, + { + "epoch": 1.0240492528698775, + "loss": 0.842548668384552, + "loss_ce": 0.0007517996127717197, + "loss_iou": 0.3984375, + "loss_num": 0.00909423828125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 504219052, + "step": 3992 + }, + { + "epoch": 1.024305778233823, + "grad_norm": 41.68785095214844, + "learning_rate": 5e-06, + "loss": 0.8191, + "num_input_tokens_seen": 504344092, + "step": 3993 + }, + { + "epoch": 1.024305778233823, + "loss": 0.7479589581489563, + "loss_ce": 0.0008886796422302723, + "loss_iou": 0.357421875, + "loss_num": 0.00677490234375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 504344092, + "step": 3993 + }, + { + "epoch": 1.0245623035977682, + "grad_norm": 52.6393928527832, + "learning_rate": 5e-06, + "loss": 0.9053, + "num_input_tokens_seen": 504469652, + "step": 3994 + }, + { + "epoch": 1.0245623035977682, + "loss": 0.8952017426490784, + "loss_ce": 0.0040884558111429214, + "loss_iou": 0.4140625, + "loss_num": 0.012451171875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 504469652, + "step": 3994 + }, + { + "epoch": 1.0248188289617135, + "grad_norm": 53.295955657958984, + "learning_rate": 5e-06, + "loss": 0.9033, + "num_input_tokens_seen": 504595736, + "step": 3995 + }, + { + "epoch": 1.0248188289617135, + "loss": 0.8795344829559326, + "loss_ce": 0.00013994230539537966, + "loss_iou": 0.41796875, + "loss_num": 0.00927734375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 504595736, + "step": 3995 + }, + { + "epoch": 1.025075354325659, + "grad_norm": 51.17523193359375, + "learning_rate": 5e-06, + "loss": 0.8232, + "num_input_tokens_seen": 504721076, + "step": 3996 + }, + { + "epoch": 1.025075354325659, + "loss": 0.6793662309646606, + "loss_ce": 0.00041117900400422513, + "loss_iou": 0.326171875, + "loss_num": 0.005706787109375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 504721076, + "step": 3996 + }, + { + "epoch": 1.0253318796896043, + "grad_norm": 44.53055953979492, + "learning_rate": 5e-06, + "loss": 0.7955, + "num_input_tokens_seen": 504847168, + "step": 3997 + }, + { + "epoch": 1.0253318796896043, + "loss": 0.8204234838485718, + "loss_ce": 0.00011101095878984779, + "loss_iou": 0.39453125, + "loss_num": 0.006591796875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 504847168, + "step": 3997 + }, + { + "epoch": 1.0255884050535498, + "grad_norm": 55.485469818115234, + "learning_rate": 5e-06, + "loss": 0.9825, + "num_input_tokens_seen": 504974064, + "step": 3998 + }, + { + "epoch": 1.0255884050535498, + "loss": 1.02176833152771, + "loss_ce": 0.0022370684891939163, + "loss_iou": 0.482421875, + "loss_num": 0.010986328125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 504974064, + "step": 3998 + }, + { + "epoch": 1.025844930417495, + "grad_norm": 49.297359466552734, + "learning_rate": 5e-06, + "loss": 0.843, + "num_input_tokens_seen": 505100968, + "step": 3999 + }, + { + "epoch": 1.025844930417495, + "loss": 0.9438464641571045, + "loss_ce": 0.00585815217345953, + "loss_iou": 0.421875, + "loss_num": 0.0186767578125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 505100968, + "step": 3999 + }, + { + "epoch": 1.0261014557814403, + "grad_norm": 51.123443603515625, + "learning_rate": 5e-06, + "loss": 0.8516, + "num_input_tokens_seen": 505227696, + "step": 4000 + }, + { + "epoch": 1.0261014557814403, + "eval_icons_CIoU": 0.2629767432808876, + "eval_icons_GIoU": 0.2294548898935318, + "eval_icons_IoU": 0.4418274313211441, + "eval_icons_MAE_all": 0.02577248029410839, + "eval_icons_MAE_h": 0.03044621180742979, + "eval_icons_MAE_w": 0.05449048802256584, + "eval_icons_MAE_x_boxes": 0.05605706572532654, + "eval_icons_MAE_y_boxes": 0.031257313676178455, + "eval_icons_NUM_probability": 0.9997778534889221, + "eval_icons_inside_bbox": 0.6684027910232544, + "eval_icons_loss": 1.6291700601577759, + "eval_icons_loss_ce": 8.988614899863023e-05, + "eval_icons_loss_iou": 0.7574462890625, + "eval_icons_loss_num": 0.02904510498046875, + "eval_icons_loss_xval": 1.658935546875, + "eval_icons_runtime": 56.9556, + "eval_icons_samples_per_second": 0.878, + "eval_icons_steps_per_second": 0.035, + "num_input_tokens_seen": 505227696, + "step": 4000 + }, + { + "epoch": 1.0261014557814403, + "eval_screenspot_CIoU": 0.1299015680948893, + "eval_screenspot_GIoU": 0.1171463131904602, + "eval_screenspot_IoU": 0.2965235710144043, + "eval_screenspot_MAE_all": 0.07560681675871213, + "eval_screenspot_MAE_h": 0.0648095856110255, + "eval_screenspot_MAE_w": 0.1222541332244873, + "eval_screenspot_MAE_x_boxes": 0.1042500560482343, + "eval_screenspot_MAE_y_boxes": 0.05802440643310547, + "eval_screenspot_NUM_probability": 0.9999476273854574, + "eval_screenspot_inside_bbox": 0.5808333357175192, + "eval_screenspot_loss": 2.1835989952087402, + "eval_screenspot_loss_ce": 0.002204801654443145, + "eval_screenspot_loss_iou": 0.90771484375, + "eval_screenspot_loss_num": 0.07987721761067708, + "eval_screenspot_loss_xval": 2.2151692708333335, + "eval_screenspot_runtime": 94.4057, + "eval_screenspot_samples_per_second": 0.943, + "eval_screenspot_steps_per_second": 0.032, + "num_input_tokens_seen": 505227696, + "step": 4000 + }, + { + "epoch": 1.0261014557814403, + "loss": 2.1956138610839844, + "loss_ce": 0.0012780085671693087, + "loss_iou": 0.9140625, + "loss_num": 0.0732421875, + "loss_xval": 2.1875, + "num_input_tokens_seen": 505227696, + "step": 4000 + }, + { + "epoch": 1.0263579811453858, + "grad_norm": 47.42884826660156, + "learning_rate": 5e-06, + "loss": 0.8716, + "num_input_tokens_seen": 505353260, + "step": 4001 + }, + { + "epoch": 1.0263579811453858, + "loss": 0.8671429753303528, + "loss_ce": 0.003373438026756048, + "loss_iou": 0.396484375, + "loss_num": 0.0142822265625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 505353260, + "step": 4001 + }, + { + "epoch": 1.026614506509331, + "grad_norm": 59.812591552734375, + "learning_rate": 5e-06, + "loss": 0.9393, + "num_input_tokens_seen": 505479816, + "step": 4002 + }, + { + "epoch": 1.026614506509331, + "loss": 0.8337695598602295, + "loss_ce": 0.00027350563323125243, + "loss_iou": 0.3984375, + "loss_num": 0.0074462890625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 505479816, + "step": 4002 + }, + { + "epoch": 1.0268710318732766, + "grad_norm": 59.36829376220703, + "learning_rate": 5e-06, + "loss": 0.9306, + "num_input_tokens_seen": 505606572, + "step": 4003 + }, + { + "epoch": 1.0268710318732766, + "loss": 0.854920506477356, + "loss_ce": 0.0014048840384930372, + "loss_iou": 0.408203125, + "loss_num": 0.00762939453125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 505606572, + "step": 4003 + }, + { + "epoch": 1.0271275572372218, + "grad_norm": 46.584815979003906, + "learning_rate": 5e-06, + "loss": 0.9396, + "num_input_tokens_seen": 505733128, + "step": 4004 + }, + { + "epoch": 1.0271275572372218, + "loss": 0.8529113531112671, + "loss_ce": 0.0006164070800878108, + "loss_iou": 0.38671875, + "loss_num": 0.0159912109375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 505733128, + "step": 4004 + }, + { + "epoch": 1.027384082601167, + "grad_norm": 45.365318298339844, + "learning_rate": 5e-06, + "loss": 0.9811, + "num_input_tokens_seen": 505859824, + "step": 4005 + }, + { + "epoch": 1.027384082601167, + "loss": 1.001922845840454, + "loss_ce": 0.0014346300158649683, + "loss_iou": 0.478515625, + "loss_num": 0.00860595703125, + "loss_xval": 1.0, + "num_input_tokens_seen": 505859824, + "step": 4005 + }, + { + "epoch": 1.0276406079651126, + "grad_norm": 60.09733581542969, + "learning_rate": 5e-06, + "loss": 0.9692, + "num_input_tokens_seen": 505987332, + "step": 4006 + }, + { + "epoch": 1.0276406079651126, + "loss": 1.1657438278198242, + "loss_ce": 0.0011930913897231221, + "loss_iou": 0.54296875, + "loss_num": 0.0162353515625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 505987332, + "step": 4006 + }, + { + "epoch": 1.0278971333290579, + "grad_norm": 45.711727142333984, + "learning_rate": 5e-06, + "loss": 0.9187, + "num_input_tokens_seen": 506112948, + "step": 4007 + }, + { + "epoch": 1.0278971333290579, + "loss": 0.918578028678894, + "loss_ce": 0.00012098745355615392, + "loss_iou": 0.419921875, + "loss_num": 0.0157470703125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 506112948, + "step": 4007 + }, + { + "epoch": 1.0281536586930033, + "grad_norm": 27.9440975189209, + "learning_rate": 5e-06, + "loss": 0.8746, + "num_input_tokens_seen": 506238744, + "step": 4008 + }, + { + "epoch": 1.0281536586930033, + "loss": 0.7322365641593933, + "loss_ce": 0.0007912334986031055, + "loss_iou": 0.34765625, + "loss_num": 0.00750732421875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 506238744, + "step": 4008 + }, + { + "epoch": 1.0284101840569486, + "grad_norm": 20.25840950012207, + "learning_rate": 5e-06, + "loss": 0.8519, + "num_input_tokens_seen": 506364736, + "step": 4009 + }, + { + "epoch": 1.0284101840569486, + "loss": 0.6571990847587585, + "loss_ce": 0.00021670080604963005, + "loss_iou": 0.30859375, + "loss_num": 0.00811767578125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 506364736, + "step": 4009 + }, + { + "epoch": 1.0286667094208939, + "grad_norm": 23.128658294677734, + "learning_rate": 5e-06, + "loss": 0.9018, + "num_input_tokens_seen": 506491016, + "step": 4010 + }, + { + "epoch": 1.0286667094208939, + "loss": 1.0391865968704224, + "loss_ce": 0.0025654607452452183, + "loss_iou": 0.45703125, + "loss_num": 0.024169921875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 506491016, + "step": 4010 + }, + { + "epoch": 1.0289232347848394, + "grad_norm": 27.171491622924805, + "learning_rate": 5e-06, + "loss": 0.8615, + "num_input_tokens_seen": 506618236, + "step": 4011 + }, + { + "epoch": 1.0289232347848394, + "loss": 0.9537492990493774, + "loss_ce": 0.0035539937671273947, + "loss_iou": 0.42578125, + "loss_num": 0.019775390625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 506618236, + "step": 4011 + }, + { + "epoch": 1.0291797601487847, + "grad_norm": 20.324810028076172, + "learning_rate": 5e-06, + "loss": 0.9514, + "num_input_tokens_seen": 506744016, + "step": 4012 + }, + { + "epoch": 1.0291797601487847, + "loss": 0.8650120496749878, + "loss_ce": 0.00026595970848575234, + "loss_iou": 0.396484375, + "loss_num": 0.01409912109375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 506744016, + "step": 4012 + }, + { + "epoch": 1.0294362855127301, + "grad_norm": 16.68267822265625, + "learning_rate": 5e-06, + "loss": 0.8792, + "num_input_tokens_seen": 506870552, + "step": 4013 + }, + { + "epoch": 1.0294362855127301, + "loss": 1.138114094734192, + "loss_ce": 0.0004187042941339314, + "loss_iou": 0.51171875, + "loss_num": 0.0225830078125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 506870552, + "step": 4013 + }, + { + "epoch": 1.0296928108766754, + "grad_norm": 13.695229530334473, + "learning_rate": 5e-06, + "loss": 0.846, + "num_input_tokens_seen": 506996548, + "step": 4014 + }, + { + "epoch": 1.0296928108766754, + "loss": 0.8118672370910645, + "loss_ce": 9.967176447389647e-05, + "loss_iou": 0.38671875, + "loss_num": 0.00799560546875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 506996548, + "step": 4014 + }, + { + "epoch": 1.0299493362406207, + "grad_norm": 797.0865478515625, + "learning_rate": 5e-06, + "loss": 0.8367, + "num_input_tokens_seen": 507123064, + "step": 4015 + }, + { + "epoch": 1.0299493362406207, + "loss": 0.9423940181732178, + "loss_ce": 0.002452583983540535, + "loss_iou": 0.43359375, + "loss_num": 0.0146484375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 507123064, + "step": 4015 + }, + { + "epoch": 1.0302058616045662, + "grad_norm": 161.01829528808594, + "learning_rate": 5e-06, + "loss": 0.8494, + "num_input_tokens_seen": 507248972, + "step": 4016 + }, + { + "epoch": 1.0302058616045662, + "loss": 0.8710408210754395, + "loss_ce": 0.00019126357801724225, + "loss_iou": 0.4140625, + "loss_num": 0.00830078125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 507248972, + "step": 4016 + }, + { + "epoch": 1.0304623869685114, + "grad_norm": 33.35986328125, + "learning_rate": 5e-06, + "loss": 0.7036, + "num_input_tokens_seen": 507375344, + "step": 4017 + }, + { + "epoch": 1.0304623869685114, + "loss": 0.7417660355567932, + "loss_ce": 0.002019948326051235, + "loss_iou": 0.341796875, + "loss_num": 0.0113525390625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 507375344, + "step": 4017 + }, + { + "epoch": 1.030718912332457, + "grad_norm": 36.47433090209961, + "learning_rate": 5e-06, + "loss": 0.9544, + "num_input_tokens_seen": 507501808, + "step": 4018 + }, + { + "epoch": 1.030718912332457, + "loss": 0.8726283311843872, + "loss_ce": 0.004464235622435808, + "loss_iou": 0.396484375, + "loss_num": 0.0146484375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 507501808, + "step": 4018 + }, + { + "epoch": 1.0309754376964022, + "grad_norm": 53.0880126953125, + "learning_rate": 5e-06, + "loss": 0.8246, + "num_input_tokens_seen": 507627512, + "step": 4019 + }, + { + "epoch": 1.0309754376964022, + "loss": 0.8161571025848389, + "loss_ce": 0.00023915445490274578, + "loss_iou": 0.3828125, + "loss_num": 0.0103759765625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 507627512, + "step": 4019 + }, + { + "epoch": 1.0312319630603477, + "grad_norm": 46.42916488647461, + "learning_rate": 5e-06, + "loss": 0.7719, + "num_input_tokens_seen": 507753540, + "step": 4020 + }, + { + "epoch": 1.0312319630603477, + "loss": 0.8266443014144897, + "loss_ce": 0.000472433865070343, + "loss_iou": 0.38671875, + "loss_num": 0.01019287109375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 507753540, + "step": 4020 + }, + { + "epoch": 1.031488488424293, + "grad_norm": 28.865375518798828, + "learning_rate": 5e-06, + "loss": 0.9821, + "num_input_tokens_seen": 507880728, + "step": 4021 + }, + { + "epoch": 1.031488488424293, + "loss": 1.1604408025741577, + "loss_ce": 0.002237738808616996, + "loss_iou": 0.52734375, + "loss_num": 0.021484375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 507880728, + "step": 4021 + }, + { + "epoch": 1.0317450137882382, + "grad_norm": 98.63385772705078, + "learning_rate": 5e-06, + "loss": 0.8294, + "num_input_tokens_seen": 508006272, + "step": 4022 + }, + { + "epoch": 1.0317450137882382, + "loss": 0.7089860439300537, + "loss_ce": 0.0004899362102150917, + "loss_iou": 0.341796875, + "loss_num": 0.00506591796875, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 508006272, + "step": 4022 + }, + { + "epoch": 1.0320015391521837, + "grad_norm": 39.560123443603516, + "learning_rate": 5e-06, + "loss": 0.8525, + "num_input_tokens_seen": 508132048, + "step": 4023 + }, + { + "epoch": 1.0320015391521837, + "loss": 0.8363714814186096, + "loss_ce": 0.0016546788392588496, + "loss_iou": 0.388671875, + "loss_num": 0.011474609375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 508132048, + "step": 4023 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 48.35432052612305, + "learning_rate": 5e-06, + "loss": 0.8298, + "num_input_tokens_seen": 508259392, + "step": 4024 + }, + { + "epoch": 1.032258064516129, + "loss": 0.8597798347473145, + "loss_ce": 0.001869632862508297, + "loss_iou": 0.40234375, + "loss_num": 0.01055908203125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 508259392, + "step": 4024 + }, + { + "epoch": 1.0325145898800745, + "grad_norm": 63.1019401550293, + "learning_rate": 5e-06, + "loss": 0.8458, + "num_input_tokens_seen": 508385524, + "step": 4025 + }, + { + "epoch": 1.0325145898800745, + "loss": 0.8330774903297424, + "loss_ce": 0.0003138239844702184, + "loss_iou": 0.384765625, + "loss_num": 0.0125732421875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 508385524, + "step": 4025 + }, + { + "epoch": 1.0327711152440198, + "grad_norm": 47.8669548034668, + "learning_rate": 5e-06, + "loss": 0.9118, + "num_input_tokens_seen": 508510628, + "step": 4026 + }, + { + "epoch": 1.0327711152440198, + "loss": 0.8845911622047424, + "loss_ce": 0.0027552014216780663, + "loss_iou": 0.408203125, + "loss_num": 0.0128173828125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 508510628, + "step": 4026 + }, + { + "epoch": 1.033027640607965, + "grad_norm": 35.50339889526367, + "learning_rate": 5e-06, + "loss": 0.8826, + "num_input_tokens_seen": 508636596, + "step": 4027 + }, + { + "epoch": 1.033027640607965, + "loss": 1.0330781936645508, + "loss_ce": 0.00036323454696685076, + "loss_iou": 0.462890625, + "loss_num": 0.021484375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 508636596, + "step": 4027 + }, + { + "epoch": 1.0332841659719105, + "grad_norm": 37.343360900878906, + "learning_rate": 5e-06, + "loss": 0.7692, + "num_input_tokens_seen": 508762688, + "step": 4028 + }, + { + "epoch": 1.0332841659719105, + "loss": 0.7749756574630737, + "loss_ce": 0.0010499266209080815, + "loss_iou": 0.359375, + "loss_num": 0.01068115234375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 508762688, + "step": 4028 + }, + { + "epoch": 1.0335406913358558, + "grad_norm": 56.10839080810547, + "learning_rate": 5e-06, + "loss": 0.8001, + "num_input_tokens_seen": 508889560, + "step": 4029 + }, + { + "epoch": 1.0335406913358558, + "loss": 0.7847346067428589, + "loss_ce": 0.0003107609518337995, + "loss_iou": 0.365234375, + "loss_num": 0.01092529296875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 508889560, + "step": 4029 + }, + { + "epoch": 1.0337972166998013, + "grad_norm": 43.472537994384766, + "learning_rate": 5e-06, + "loss": 0.8393, + "num_input_tokens_seen": 509014880, + "step": 4030 + }, + { + "epoch": 1.0337972166998013, + "loss": 0.805358350276947, + "loss_ce": 0.0006708444561809301, + "loss_iou": 0.37109375, + "loss_num": 0.0123291015625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 509014880, + "step": 4030 + }, + { + "epoch": 1.0340537420637466, + "grad_norm": 42.960784912109375, + "learning_rate": 5e-06, + "loss": 0.7782, + "num_input_tokens_seen": 509141444, + "step": 4031 + }, + { + "epoch": 1.0340537420637466, + "loss": 0.6642683744430542, + "loss_ce": 0.0002058270911220461, + "loss_iou": 0.318359375, + "loss_num": 0.005279541015625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 509141444, + "step": 4031 + }, + { + "epoch": 1.0343102674276918, + "grad_norm": 43.12409591674805, + "learning_rate": 5e-06, + "loss": 0.9014, + "num_input_tokens_seen": 509267308, + "step": 4032 + }, + { + "epoch": 1.0343102674276918, + "loss": 0.8103522658348083, + "loss_ce": 0.00029364589136093855, + "loss_iou": 0.384765625, + "loss_num": 0.0079345703125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 509267308, + "step": 4032 + }, + { + "epoch": 1.0345667927916373, + "grad_norm": 42.7735481262207, + "learning_rate": 5e-06, + "loss": 0.876, + "num_input_tokens_seen": 509393368, + "step": 4033 + }, + { + "epoch": 1.0345667927916373, + "loss": 0.882784366607666, + "loss_ce": 0.0033897929824888706, + "loss_iou": 0.408203125, + "loss_num": 0.01251220703125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 509393368, + "step": 4033 + }, + { + "epoch": 1.0348233181555826, + "grad_norm": 29.572301864624023, + "learning_rate": 5e-06, + "loss": 1.0637, + "num_input_tokens_seen": 509518320, + "step": 4034 + }, + { + "epoch": 1.0348233181555826, + "loss": 0.8878448605537415, + "loss_ce": 0.00014956855739001185, + "loss_iou": 0.412109375, + "loss_num": 0.01220703125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 509518320, + "step": 4034 + }, + { + "epoch": 1.035079843519528, + "grad_norm": 36.41844940185547, + "learning_rate": 5e-06, + "loss": 0.7466, + "num_input_tokens_seen": 509644528, + "step": 4035 + }, + { + "epoch": 1.035079843519528, + "loss": 0.7647613883018494, + "loss_ce": 0.0032867693807929754, + "loss_iou": 0.359375, + "loss_num": 0.00830078125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 509644528, + "step": 4035 + }, + { + "epoch": 1.0353363688834734, + "grad_norm": 43.95167541503906, + "learning_rate": 5e-06, + "loss": 0.9582, + "num_input_tokens_seen": 509769956, + "step": 4036 + }, + { + "epoch": 1.0353363688834734, + "loss": 0.8984459042549133, + "loss_ce": 0.0002524935989640653, + "loss_iou": 0.416015625, + "loss_num": 0.0128173828125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 509769956, + "step": 4036 + }, + { + "epoch": 1.0355928942474186, + "grad_norm": 23.650976181030273, + "learning_rate": 5e-06, + "loss": 0.8792, + "num_input_tokens_seen": 509896356, + "step": 4037 + }, + { + "epoch": 1.0355928942474186, + "loss": 0.874738335609436, + "loss_ce": 0.000714890833478421, + "loss_iou": 0.412109375, + "loss_num": 0.01031494140625, + "loss_xval": 0.875, + "num_input_tokens_seen": 509896356, + "step": 4037 + }, + { + "epoch": 1.0358494196113641, + "grad_norm": 24.00243377685547, + "learning_rate": 5e-06, + "loss": 0.8238, + "num_input_tokens_seen": 510022620, + "step": 4038 + }, + { + "epoch": 1.0358494196113641, + "loss": 0.8856875896453857, + "loss_ce": 0.0006777917733415961, + "loss_iou": 0.400390625, + "loss_num": 0.016357421875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 510022620, + "step": 4038 + }, + { + "epoch": 1.0361059449753094, + "grad_norm": 42.28544235229492, + "learning_rate": 5e-06, + "loss": 0.8302, + "num_input_tokens_seen": 510150220, + "step": 4039 + }, + { + "epoch": 1.0361059449753094, + "loss": 0.7426685094833374, + "loss_ce": 0.00023685320047661662, + "loss_iou": 0.349609375, + "loss_num": 0.00897216796875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 510150220, + "step": 4039 + }, + { + "epoch": 1.0363624703392549, + "grad_norm": 56.32689666748047, + "learning_rate": 5e-06, + "loss": 0.8899, + "num_input_tokens_seen": 510276700, + "step": 4040 + }, + { + "epoch": 1.0363624703392549, + "loss": 0.7801055908203125, + "loss_ce": 0.0027617907617241144, + "loss_iou": 0.3671875, + "loss_num": 0.00897216796875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 510276700, + "step": 4040 + }, + { + "epoch": 1.0366189957032002, + "grad_norm": 41.187015533447266, + "learning_rate": 5e-06, + "loss": 1.0213, + "num_input_tokens_seen": 510402816, + "step": 4041 + }, + { + "epoch": 1.0366189957032002, + "loss": 1.0540460348129272, + "loss_ce": 0.0008234119741246104, + "loss_iou": 0.47265625, + "loss_num": 0.021484375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 510402816, + "step": 4041 + }, + { + "epoch": 1.0368755210671454, + "grad_norm": 35.79241180419922, + "learning_rate": 5e-06, + "loss": 0.9331, + "num_input_tokens_seen": 510529388, + "step": 4042 + }, + { + "epoch": 1.0368755210671454, + "loss": 0.7981353402137756, + "loss_ce": 0.0017486142460256815, + "loss_iou": 0.375, + "loss_num": 0.0087890625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 510529388, + "step": 4042 + }, + { + "epoch": 1.037132046431091, + "grad_norm": 43.548126220703125, + "learning_rate": 5e-06, + "loss": 0.8686, + "num_input_tokens_seen": 510656188, + "step": 4043 + }, + { + "epoch": 1.037132046431091, + "loss": 0.8347257971763611, + "loss_ce": 0.00220626313239336, + "loss_iou": 0.376953125, + "loss_num": 0.01611328125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 510656188, + "step": 4043 + }, + { + "epoch": 1.0373885717950362, + "grad_norm": 44.28942108154297, + "learning_rate": 5e-06, + "loss": 0.8296, + "num_input_tokens_seen": 510782796, + "step": 4044 + }, + { + "epoch": 1.0373885717950362, + "loss": 0.8449469208717346, + "loss_ce": 0.000708654522895813, + "loss_iou": 0.380859375, + "loss_num": 0.0167236328125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 510782796, + "step": 4044 + }, + { + "epoch": 1.0376450971589817, + "grad_norm": 31.26905632019043, + "learning_rate": 5e-06, + "loss": 0.8213, + "num_input_tokens_seen": 510909384, + "step": 4045 + }, + { + "epoch": 1.0376450971589817, + "loss": 0.7793227434158325, + "loss_ce": 0.0010024096118286252, + "loss_iou": 0.373046875, + "loss_num": 0.0062255859375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 510909384, + "step": 4045 + }, + { + "epoch": 1.037901622522927, + "grad_norm": 34.49673080444336, + "learning_rate": 5e-06, + "loss": 0.7927, + "num_input_tokens_seen": 511034312, + "step": 4046 + }, + { + "epoch": 1.037901622522927, + "loss": 0.7747915983200073, + "loss_ce": 0.003063072683289647, + "loss_iou": 0.36328125, + "loss_num": 0.0089111328125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 511034312, + "step": 4046 + }, + { + "epoch": 1.0381581478868722, + "grad_norm": 67.95018005371094, + "learning_rate": 5e-06, + "loss": 0.8477, + "num_input_tokens_seen": 511160560, + "step": 4047 + }, + { + "epoch": 1.0381581478868722, + "loss": 0.7233680486679077, + "loss_ce": 0.0002235166321042925, + "loss_iou": 0.34765625, + "loss_num": 0.0054931640625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 511160560, + "step": 4047 + }, + { + "epoch": 1.0384146732508177, + "grad_norm": 29.56394386291504, + "learning_rate": 5e-06, + "loss": 0.8915, + "num_input_tokens_seen": 511285548, + "step": 4048 + }, + { + "epoch": 1.0384146732508177, + "loss": 0.8260068893432617, + "loss_ce": 0.0010557555360719562, + "loss_iou": 0.37890625, + "loss_num": 0.01336669921875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 511285548, + "step": 4048 + }, + { + "epoch": 1.038671198614763, + "grad_norm": 31.000885009765625, + "learning_rate": 5e-06, + "loss": 0.9211, + "num_input_tokens_seen": 511411948, + "step": 4049 + }, + { + "epoch": 1.038671198614763, + "loss": 0.9050424098968506, + "loss_ce": 0.0022103150840848684, + "loss_iou": 0.40234375, + "loss_num": 0.0201416015625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 511411948, + "step": 4049 + }, + { + "epoch": 1.0389277239787085, + "grad_norm": 43.051029205322266, + "learning_rate": 5e-06, + "loss": 0.8572, + "num_input_tokens_seen": 511538172, + "step": 4050 + }, + { + "epoch": 1.0389277239787085, + "loss": 0.8784751892089844, + "loss_ce": 0.0020103405695408583, + "loss_iou": 0.41015625, + "loss_num": 0.011474609375, + "loss_xval": 0.875, + "num_input_tokens_seen": 511538172, + "step": 4050 + }, + { + "epoch": 1.0391842493426537, + "grad_norm": 46.405296325683594, + "learning_rate": 5e-06, + "loss": 0.8733, + "num_input_tokens_seen": 511664760, + "step": 4051 + }, + { + "epoch": 1.0391842493426537, + "loss": 1.006199598312378, + "loss_ce": 0.0018050993094220757, + "loss_iou": 0.470703125, + "loss_num": 0.0128173828125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 511664760, + "step": 4051 + }, + { + "epoch": 1.039440774706599, + "grad_norm": 43.630126953125, + "learning_rate": 5e-06, + "loss": 0.8991, + "num_input_tokens_seen": 511789604, + "step": 4052 + }, + { + "epoch": 1.039440774706599, + "loss": 1.1900179386138916, + "loss_ce": 0.002029655035585165, + "loss_iou": 0.546875, + "loss_num": 0.0186767578125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 511789604, + "step": 4052 + }, + { + "epoch": 1.0396973000705445, + "grad_norm": 36.97641372680664, + "learning_rate": 5e-06, + "loss": 0.8273, + "num_input_tokens_seen": 511915748, + "step": 4053 + }, + { + "epoch": 1.0396973000705445, + "loss": 0.8166852593421936, + "loss_ce": 0.001743852742947638, + "loss_iou": 0.37890625, + "loss_num": 0.0115966796875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 511915748, + "step": 4053 + }, + { + "epoch": 1.0399538254344898, + "grad_norm": 60.402339935302734, + "learning_rate": 5e-06, + "loss": 1.0311, + "num_input_tokens_seen": 512041004, + "step": 4054 + }, + { + "epoch": 1.0399538254344898, + "loss": 0.689223051071167, + "loss_ce": 0.0002582204469945282, + "loss_iou": 0.33203125, + "loss_num": 0.004730224609375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 512041004, + "step": 4054 + }, + { + "epoch": 1.0402103507984353, + "grad_norm": 52.778472900390625, + "learning_rate": 5e-06, + "loss": 0.8645, + "num_input_tokens_seen": 512167072, + "step": 4055 + }, + { + "epoch": 1.0402103507984353, + "loss": 0.9841662645339966, + "loss_ce": 0.00027950218645855784, + "loss_iou": 0.46484375, + "loss_num": 0.01104736328125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 512167072, + "step": 4055 + }, + { + "epoch": 1.0404668761623805, + "grad_norm": 52.40740203857422, + "learning_rate": 5e-06, + "loss": 0.9026, + "num_input_tokens_seen": 512293648, + "step": 4056 + }, + { + "epoch": 1.0404668761623805, + "loss": 0.8972729444503784, + "loss_ce": 0.00030035615782253444, + "loss_iou": 0.42578125, + "loss_num": 0.00927734375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 512293648, + "step": 4056 + }, + { + "epoch": 1.0407234015263258, + "grad_norm": 57.483436584472656, + "learning_rate": 5e-06, + "loss": 0.8888, + "num_input_tokens_seen": 512419600, + "step": 4057 + }, + { + "epoch": 1.0407234015263258, + "loss": 0.8787315487861633, + "loss_ce": 0.005440541543066502, + "loss_iou": 0.400390625, + "loss_num": 0.0146484375, + "loss_xval": 0.875, + "num_input_tokens_seen": 512419600, + "step": 4057 + }, + { + "epoch": 1.0409799268902713, + "grad_norm": 52.76645278930664, + "learning_rate": 5e-06, + "loss": 0.8664, + "num_input_tokens_seen": 512544288, + "step": 4058 + }, + { + "epoch": 1.0409799268902713, + "loss": 0.9172601699829102, + "loss_ce": 0.0007562080281786621, + "loss_iou": 0.435546875, + "loss_num": 0.00946044921875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 512544288, + "step": 4058 + }, + { + "epoch": 1.0412364522542166, + "grad_norm": 49.799190521240234, + "learning_rate": 5e-06, + "loss": 0.7592, + "num_input_tokens_seen": 512669632, + "step": 4059 + }, + { + "epoch": 1.0412364522542166, + "loss": 0.7349209189414978, + "loss_ce": 0.002987339859828353, + "loss_iou": 0.34375, + "loss_num": 0.0093994140625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 512669632, + "step": 4059 + }, + { + "epoch": 1.041492977618162, + "grad_norm": 45.17097473144531, + "learning_rate": 5e-06, + "loss": 0.8719, + "num_input_tokens_seen": 512796204, + "step": 4060 + }, + { + "epoch": 1.041492977618162, + "loss": 0.9596514701843262, + "loss_ce": 0.0011553840013220906, + "loss_iou": 0.43359375, + "loss_num": 0.01806640625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 512796204, + "step": 4060 + }, + { + "epoch": 1.0417495029821073, + "grad_norm": 42.879302978515625, + "learning_rate": 5e-06, + "loss": 0.9296, + "num_input_tokens_seen": 512922436, + "step": 4061 + }, + { + "epoch": 1.0417495029821073, + "loss": 1.046180248260498, + "loss_ce": 0.0012584143551066518, + "loss_iou": 0.4765625, + "loss_num": 0.0184326171875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 512922436, + "step": 4061 + }, + { + "epoch": 1.0420060283460528, + "grad_norm": 43.15908432006836, + "learning_rate": 5e-06, + "loss": 0.8661, + "num_input_tokens_seen": 513049600, + "step": 4062 + }, + { + "epoch": 1.0420060283460528, + "loss": 0.8183607459068298, + "loss_ce": 0.0004896618775092065, + "loss_iou": 0.38671875, + "loss_num": 0.0091552734375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 513049600, + "step": 4062 + }, + { + "epoch": 1.042262553709998, + "grad_norm": 48.283973693847656, + "learning_rate": 5e-06, + "loss": 0.9297, + "num_input_tokens_seen": 513176004, + "step": 4063 + }, + { + "epoch": 1.042262553709998, + "loss": 0.9644708037376404, + "loss_ce": 0.0010918928310275078, + "loss_iou": 0.439453125, + "loss_num": 0.0172119140625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 513176004, + "step": 4063 + }, + { + "epoch": 1.0425190790739434, + "grad_norm": 55.71781539916992, + "learning_rate": 5e-06, + "loss": 0.9902, + "num_input_tokens_seen": 513303180, + "step": 4064 + }, + { + "epoch": 1.0425190790739434, + "loss": 0.9372072815895081, + "loss_ce": 0.004590179305523634, + "loss_iou": 0.431640625, + "loss_num": 0.01422119140625, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 513303180, + "step": 4064 + }, + { + "epoch": 1.0427756044378889, + "grad_norm": 42.641536712646484, + "learning_rate": 5e-06, + "loss": 0.89, + "num_input_tokens_seen": 513428720, + "step": 4065 + }, + { + "epoch": 1.0427756044378889, + "loss": 0.8219008445739746, + "loss_ce": 0.0008558672852814198, + "loss_iou": 0.37109375, + "loss_num": 0.01519775390625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 513428720, + "step": 4065 + }, + { + "epoch": 1.0430321298018341, + "grad_norm": 41.095375061035156, + "learning_rate": 5e-06, + "loss": 0.9483, + "num_input_tokens_seen": 513554316, + "step": 4066 + }, + { + "epoch": 1.0430321298018341, + "loss": 1.179195761680603, + "loss_ce": 0.0009730730671435595, + "loss_iou": 0.515625, + "loss_num": 0.029296875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 513554316, + "step": 4066 + }, + { + "epoch": 1.0432886551657796, + "grad_norm": 53.62884521484375, + "learning_rate": 5e-06, + "loss": 0.8748, + "num_input_tokens_seen": 513680212, + "step": 4067 + }, + { + "epoch": 1.0432886551657796, + "loss": 0.7174863219261169, + "loss_ce": 0.0002011688193306327, + "loss_iou": 0.341796875, + "loss_num": 0.006744384765625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 513680212, + "step": 4067 + }, + { + "epoch": 1.043545180529725, + "grad_norm": 55.788612365722656, + "learning_rate": 5e-06, + "loss": 0.8366, + "num_input_tokens_seen": 513808160, + "step": 4068 + }, + { + "epoch": 1.043545180529725, + "loss": 0.8604631423950195, + "loss_ce": 0.00011156160326208919, + "loss_iou": 0.408203125, + "loss_num": 0.0087890625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 513808160, + "step": 4068 + }, + { + "epoch": 1.0438017058936702, + "grad_norm": 47.28523635864258, + "learning_rate": 5e-06, + "loss": 0.9566, + "num_input_tokens_seen": 513934116, + "step": 4069 + }, + { + "epoch": 1.0438017058936702, + "loss": 0.9616454839706421, + "loss_ce": 0.0009521078900434077, + "loss_iou": 0.435546875, + "loss_num": 0.01806640625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 513934116, + "step": 4069 + }, + { + "epoch": 1.0440582312576157, + "grad_norm": 39.817325592041016, + "learning_rate": 5e-06, + "loss": 0.8756, + "num_input_tokens_seen": 514060464, + "step": 4070 + }, + { + "epoch": 1.0440582312576157, + "loss": 0.9921343922615051, + "loss_ce": 0.000923456100281328, + "loss_iou": 0.455078125, + "loss_num": 0.0159912109375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 514060464, + "step": 4070 + }, + { + "epoch": 1.044314756621561, + "grad_norm": 31.744911193847656, + "learning_rate": 5e-06, + "loss": 0.8211, + "num_input_tokens_seen": 514185668, + "step": 4071 + }, + { + "epoch": 1.044314756621561, + "loss": 0.7388823628425598, + "loss_ce": 0.0006011135410517454, + "loss_iou": 0.353515625, + "loss_num": 0.006500244140625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 514185668, + "step": 4071 + }, + { + "epoch": 1.0445712819855064, + "grad_norm": 39.68789291381836, + "learning_rate": 5e-06, + "loss": 0.9328, + "num_input_tokens_seen": 514312756, + "step": 4072 + }, + { + "epoch": 1.0445712819855064, + "loss": 0.8843892216682434, + "loss_ce": 0.00011186256597284228, + "loss_iou": 0.3984375, + "loss_num": 0.01708984375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 514312756, + "step": 4072 + }, + { + "epoch": 1.0448278073494517, + "grad_norm": 64.71673583984375, + "learning_rate": 5e-06, + "loss": 0.834, + "num_input_tokens_seen": 514439504, + "step": 4073 + }, + { + "epoch": 1.0448278073494517, + "loss": 0.8184006214141846, + "loss_ce": 0.001506112632341683, + "loss_iou": 0.3828125, + "loss_num": 0.01055908203125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 514439504, + "step": 4073 + }, + { + "epoch": 1.045084332713397, + "grad_norm": 51.55939483642578, + "learning_rate": 5e-06, + "loss": 0.995, + "num_input_tokens_seen": 514566124, + "step": 4074 + }, + { + "epoch": 1.045084332713397, + "loss": 0.9192376136779785, + "loss_ce": 0.0002923352876678109, + "loss_iou": 0.4375, + "loss_num": 0.008544921875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 514566124, + "step": 4074 + }, + { + "epoch": 1.0453408580773424, + "grad_norm": 35.02167892456055, + "learning_rate": 5e-06, + "loss": 0.8458, + "num_input_tokens_seen": 514690796, + "step": 4075 + }, + { + "epoch": 1.0453408580773424, + "loss": 0.865117073059082, + "loss_ce": 0.0013475829036906362, + "loss_iou": 0.404296875, + "loss_num": 0.01129150390625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 514690796, + "step": 4075 + }, + { + "epoch": 1.0455973834412877, + "grad_norm": 43.27149963378906, + "learning_rate": 5e-06, + "loss": 0.8822, + "num_input_tokens_seen": 514817884, + "step": 4076 + }, + { + "epoch": 1.0455973834412877, + "loss": 1.042595624923706, + "loss_ce": 0.0015798765234649181, + "loss_iou": 0.484375, + "loss_num": 0.0140380859375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 514817884, + "step": 4076 + }, + { + "epoch": 1.0458539088052332, + "grad_norm": 42.53464889526367, + "learning_rate": 5e-06, + "loss": 0.9119, + "num_input_tokens_seen": 514943032, + "step": 4077 + }, + { + "epoch": 1.0458539088052332, + "loss": 0.8637349009513855, + "loss_ce": 0.00045366413542069495, + "loss_iou": 0.416015625, + "loss_num": 0.006622314453125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 514943032, + "step": 4077 + }, + { + "epoch": 1.0461104341691785, + "grad_norm": 40.46619415283203, + "learning_rate": 5e-06, + "loss": 0.758, + "num_input_tokens_seen": 515069060, + "step": 4078 + }, + { + "epoch": 1.0461104341691785, + "loss": 0.6984306573867798, + "loss_ce": 0.0053154779598116875, + "loss_iou": 0.32421875, + "loss_num": 0.0089111328125, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 515069060, + "step": 4078 + }, + { + "epoch": 1.0463669595331238, + "grad_norm": 55.581077575683594, + "learning_rate": 5e-06, + "loss": 1.039, + "num_input_tokens_seen": 515194120, + "step": 4079 + }, + { + "epoch": 1.0463669595331238, + "loss": 1.116402506828308, + "loss_ce": 0.002144690603017807, + "loss_iou": 0.49609375, + "loss_num": 0.0242919921875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 515194120, + "step": 4079 + }, + { + "epoch": 1.0466234848970692, + "grad_norm": 42.603397369384766, + "learning_rate": 5e-06, + "loss": 0.9301, + "num_input_tokens_seen": 515320492, + "step": 4080 + }, + { + "epoch": 1.0466234848970692, + "loss": 1.075883150100708, + "loss_ce": 0.0023968503810465336, + "loss_iou": 0.478515625, + "loss_num": 0.023193359375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 515320492, + "step": 4080 + }, + { + "epoch": 1.0468800102610145, + "grad_norm": 45.47235107421875, + "learning_rate": 5e-06, + "loss": 0.9048, + "num_input_tokens_seen": 515445424, + "step": 4081 + }, + { + "epoch": 1.0468800102610145, + "loss": 0.9404513835906982, + "loss_ce": 0.00026581919519230723, + "loss_iou": 0.421875, + "loss_num": 0.0191650390625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 515445424, + "step": 4081 + }, + { + "epoch": 1.04713653562496, + "grad_norm": 57.9911994934082, + "learning_rate": 5e-06, + "loss": 0.9449, + "num_input_tokens_seen": 515572452, + "step": 4082 + }, + { + "epoch": 1.04713653562496, + "loss": 0.9812464714050293, + "loss_ce": 0.002730884589254856, + "loss_iou": 0.458984375, + "loss_num": 0.0118408203125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 515572452, + "step": 4082 + }, + { + "epoch": 1.0473930609889053, + "grad_norm": 39.17112350463867, + "learning_rate": 5e-06, + "loss": 0.8402, + "num_input_tokens_seen": 515698208, + "step": 4083 + }, + { + "epoch": 1.0473930609889053, + "loss": 0.753851056098938, + "loss_ce": 0.0011655071284621954, + "loss_iou": 0.357421875, + "loss_num": 0.0079345703125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 515698208, + "step": 4083 + }, + { + "epoch": 1.0476495863528505, + "grad_norm": 33.82990264892578, + "learning_rate": 5e-06, + "loss": 0.8794, + "num_input_tokens_seen": 515824536, + "step": 4084 + }, + { + "epoch": 1.0476495863528505, + "loss": 0.7446677684783936, + "loss_ce": 0.0005271095433272421, + "loss_iou": 0.357421875, + "loss_num": 0.00616455078125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 515824536, + "step": 4084 + }, + { + "epoch": 1.047906111716796, + "grad_norm": 43.84975051879883, + "learning_rate": 5e-06, + "loss": 0.865, + "num_input_tokens_seen": 515951480, + "step": 4085 + }, + { + "epoch": 1.047906111716796, + "loss": 0.7855434417724609, + "loss_ce": 0.00014305440708994865, + "loss_iou": 0.373046875, + "loss_num": 0.007720947265625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 515951480, + "step": 4085 + }, + { + "epoch": 1.0481626370807413, + "grad_norm": 47.768028259277344, + "learning_rate": 5e-06, + "loss": 0.8695, + "num_input_tokens_seen": 516077944, + "step": 4086 + }, + { + "epoch": 1.0481626370807413, + "loss": 0.8065837621688843, + "loss_ce": 0.00043139158515259624, + "loss_iou": 0.384765625, + "loss_num": 0.007293701171875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 516077944, + "step": 4086 + }, + { + "epoch": 1.0484191624446868, + "grad_norm": 49.027626037597656, + "learning_rate": 5e-06, + "loss": 0.8166, + "num_input_tokens_seen": 516204644, + "step": 4087 + }, + { + "epoch": 1.0484191624446868, + "loss": 0.7573102712631226, + "loss_ce": 0.0009626103565096855, + "loss_iou": 0.359375, + "loss_num": 0.007415771484375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 516204644, + "step": 4087 + }, + { + "epoch": 1.048675687808632, + "grad_norm": 47.059906005859375, + "learning_rate": 5e-06, + "loss": 0.8327, + "num_input_tokens_seen": 516330372, + "step": 4088 + }, + { + "epoch": 1.048675687808632, + "loss": 0.7656165361404419, + "loss_ce": 0.00047976424684748054, + "loss_iou": 0.3671875, + "loss_num": 0.006378173828125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 516330372, + "step": 4088 + }, + { + "epoch": 1.0489322131725773, + "grad_norm": 53.77711486816406, + "learning_rate": 5e-06, + "loss": 0.879, + "num_input_tokens_seen": 516456360, + "step": 4089 + }, + { + "epoch": 1.0489322131725773, + "loss": 1.0247306823730469, + "loss_ce": 0.0012932176468893886, + "loss_iou": 0.486328125, + "loss_num": 0.0101318359375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 516456360, + "step": 4089 + }, + { + "epoch": 1.0491887385365228, + "grad_norm": 57.244937896728516, + "learning_rate": 5e-06, + "loss": 0.9813, + "num_input_tokens_seen": 516583404, + "step": 4090 + }, + { + "epoch": 1.0491887385365228, + "loss": 0.9540413618087769, + "loss_ce": 0.00042805264820344746, + "loss_iou": 0.435546875, + "loss_num": 0.016357421875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 516583404, + "step": 4090 + }, + { + "epoch": 1.049445263900468, + "grad_norm": 63.96878433227539, + "learning_rate": 5e-06, + "loss": 0.9631, + "num_input_tokens_seen": 516710012, + "step": 4091 + }, + { + "epoch": 1.049445263900468, + "loss": 0.9928058981895447, + "loss_ce": 0.0011066882871091366, + "loss_iou": 0.4609375, + "loss_num": 0.0142822265625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 516710012, + "step": 4091 + }, + { + "epoch": 1.0497017892644136, + "grad_norm": 44.136260986328125, + "learning_rate": 5e-06, + "loss": 0.9907, + "num_input_tokens_seen": 516835620, + "step": 4092 + }, + { + "epoch": 1.0497017892644136, + "loss": 1.1798019409179688, + "loss_ce": 0.001090968493372202, + "loss_iou": 0.5234375, + "loss_num": 0.0267333984375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 516835620, + "step": 4092 + }, + { + "epoch": 1.0499583146283589, + "grad_norm": 22.87763023376465, + "learning_rate": 5e-06, + "loss": 0.8687, + "num_input_tokens_seen": 516963284, + "step": 4093 + }, + { + "epoch": 1.0499583146283589, + "loss": 1.0090456008911133, + "loss_ce": 0.0007448159158229828, + "loss_iou": 0.453125, + "loss_num": 0.0205078125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 516963284, + "step": 4093 + }, + { + "epoch": 1.0502148399923041, + "grad_norm": 25.832195281982422, + "learning_rate": 5e-06, + "loss": 0.9388, + "num_input_tokens_seen": 517089464, + "step": 4094 + }, + { + "epoch": 1.0502148399923041, + "loss": 1.091756820678711, + "loss_ce": 0.0004482200602069497, + "loss_iou": 0.5, + "loss_num": 0.01806640625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 517089464, + "step": 4094 + }, + { + "epoch": 1.0504713653562496, + "grad_norm": 41.06627655029297, + "learning_rate": 5e-06, + "loss": 0.767, + "num_input_tokens_seen": 517216616, + "step": 4095 + }, + { + "epoch": 1.0504713653562496, + "loss": 0.7618753910064697, + "loss_ce": 0.0001566667197039351, + "loss_iou": 0.359375, + "loss_num": 0.008544921875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 517216616, + "step": 4095 + }, + { + "epoch": 1.050727890720195, + "grad_norm": 48.485076904296875, + "learning_rate": 5e-06, + "loss": 0.7837, + "num_input_tokens_seen": 517343096, + "step": 4096 + }, + { + "epoch": 1.050727890720195, + "loss": 0.8505502939224243, + "loss_ce": 0.003382347524166107, + "loss_iou": 0.3984375, + "loss_num": 0.01019287109375, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 517343096, + "step": 4096 + }, + { + "epoch": 1.0509844160841404, + "grad_norm": 48.37678909301758, + "learning_rate": 5e-06, + "loss": 0.8574, + "num_input_tokens_seen": 517469076, + "step": 4097 + }, + { + "epoch": 1.0509844160841404, + "loss": 0.9125613570213318, + "loss_ce": 0.00045193993719294667, + "loss_iou": 0.40234375, + "loss_num": 0.02099609375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 517469076, + "step": 4097 + }, + { + "epoch": 1.0512409414480857, + "grad_norm": 49.536075592041016, + "learning_rate": 5e-06, + "loss": 0.9463, + "num_input_tokens_seen": 517595808, + "step": 4098 + }, + { + "epoch": 1.0512409414480857, + "loss": 1.0381102561950684, + "loss_ce": 0.0010008021490648389, + "loss_iou": 0.478515625, + "loss_num": 0.0155029296875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 517595808, + "step": 4098 + }, + { + "epoch": 1.051497466812031, + "grad_norm": 64.05028533935547, + "learning_rate": 5e-06, + "loss": 0.861, + "num_input_tokens_seen": 517721500, + "step": 4099 + }, + { + "epoch": 1.051497466812031, + "loss": 0.9883962273597717, + "loss_ce": 0.001091520651243627, + "loss_iou": 0.45703125, + "loss_num": 0.01483154296875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 517721500, + "step": 4099 + }, + { + "epoch": 1.0517539921759764, + "grad_norm": 50.162174224853516, + "learning_rate": 5e-06, + "loss": 0.9402, + "num_input_tokens_seen": 517847684, + "step": 4100 + }, + { + "epoch": 1.0517539921759764, + "loss": 1.0948598384857178, + "loss_ce": 0.0001332151296082884, + "loss_iou": 0.498046875, + "loss_num": 0.0194091796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 517847684, + "step": 4100 + }, + { + "epoch": 1.0520105175399217, + "grad_norm": 48.528682708740234, + "learning_rate": 5e-06, + "loss": 0.8605, + "num_input_tokens_seen": 517974736, + "step": 4101 + }, + { + "epoch": 1.0520105175399217, + "loss": 1.0525346994400024, + "loss_ce": 0.0027300145011395216, + "loss_iou": 0.4765625, + "loss_num": 0.0189208984375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 517974736, + "step": 4101 + }, + { + "epoch": 1.0522670429038672, + "grad_norm": 43.97685241699219, + "learning_rate": 5e-06, + "loss": 0.8982, + "num_input_tokens_seen": 518100528, + "step": 4102 + }, + { + "epoch": 1.0522670429038672, + "loss": 0.6896644830703735, + "loss_ce": 0.0006996587035246193, + "loss_iou": 0.333984375, + "loss_num": 0.00433349609375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 518100528, + "step": 4102 + }, + { + "epoch": 1.0525235682678125, + "grad_norm": 21.584407806396484, + "learning_rate": 5e-06, + "loss": 0.8191, + "num_input_tokens_seen": 518226576, + "step": 4103 + }, + { + "epoch": 1.0525235682678125, + "loss": 0.8910377621650696, + "loss_ce": 0.0004127742722630501, + "loss_iou": 0.421875, + "loss_num": 0.00946044921875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 518226576, + "step": 4103 + }, + { + "epoch": 1.0527800936317577, + "grad_norm": 16.553874969482422, + "learning_rate": 5e-06, + "loss": 0.8127, + "num_input_tokens_seen": 518354356, + "step": 4104 + }, + { + "epoch": 1.0527800936317577, + "loss": 0.6649641990661621, + "loss_ce": 0.0006575700244866312, + "loss_iou": 0.306640625, + "loss_num": 0.0101318359375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 518354356, + "step": 4104 + }, + { + "epoch": 1.0530366189957032, + "grad_norm": 22.362987518310547, + "learning_rate": 5e-06, + "loss": 0.8504, + "num_input_tokens_seen": 518480516, + "step": 4105 + }, + { + "epoch": 1.0530366189957032, + "loss": 0.722992479801178, + "loss_ce": 0.0018010535277426243, + "loss_iou": 0.337890625, + "loss_num": 0.0089111328125, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 518480516, + "step": 4105 + }, + { + "epoch": 1.0532931443596485, + "grad_norm": 19.146015167236328, + "learning_rate": 5e-06, + "loss": 0.6915, + "num_input_tokens_seen": 518607312, + "step": 4106 + }, + { + "epoch": 1.0532931443596485, + "loss": 0.6571321487426758, + "loss_ce": 0.001126310438849032, + "loss_iou": 0.314453125, + "loss_num": 0.005401611328125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 518607312, + "step": 4106 + }, + { + "epoch": 1.053549669723594, + "grad_norm": 17.35715103149414, + "learning_rate": 5e-06, + "loss": 0.7744, + "num_input_tokens_seen": 518732600, + "step": 4107 + }, + { + "epoch": 1.053549669723594, + "loss": 0.8244946002960205, + "loss_ce": 0.00027583169867284596, + "loss_iou": 0.392578125, + "loss_num": 0.007659912109375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 518732600, + "step": 4107 + }, + { + "epoch": 1.0538061950875393, + "grad_norm": 37.08160400390625, + "learning_rate": 5e-06, + "loss": 0.8548, + "num_input_tokens_seen": 518859820, + "step": 4108 + }, + { + "epoch": 1.0538061950875393, + "loss": 1.0898687839508057, + "loss_ce": 0.001001559430733323, + "loss_iou": 0.490234375, + "loss_num": 0.021484375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 518859820, + "step": 4108 + }, + { + "epoch": 1.0540627204514847, + "grad_norm": 48.44367218017578, + "learning_rate": 5e-06, + "loss": 0.9801, + "num_input_tokens_seen": 518986280, + "step": 4109 + }, + { + "epoch": 1.0540627204514847, + "loss": 1.0455784797668457, + "loss_ce": 0.005051194690167904, + "loss_iou": 0.4765625, + "loss_num": 0.017578125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 518986280, + "step": 4109 + }, + { + "epoch": 1.05431924581543, + "grad_norm": 56.68216323852539, + "learning_rate": 5e-06, + "loss": 0.9053, + "num_input_tokens_seen": 519114532, + "step": 4110 + }, + { + "epoch": 1.05431924581543, + "loss": 1.0008835792541504, + "loss_ce": 0.002836664905771613, + "loss_iou": 0.4375, + "loss_num": 0.0244140625, + "loss_xval": 1.0, + "num_input_tokens_seen": 519114532, + "step": 4110 + }, + { + "epoch": 1.0545757711793753, + "grad_norm": 48.99155044555664, + "learning_rate": 5e-06, + "loss": 0.9159, + "num_input_tokens_seen": 519239600, + "step": 4111 + }, + { + "epoch": 1.0545757711793753, + "loss": 0.9786664247512817, + "loss_ce": 0.0011273554991930723, + "loss_iou": 0.443359375, + "loss_num": 0.0184326171875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 519239600, + "step": 4111 + }, + { + "epoch": 1.0548322965433208, + "grad_norm": 34.704498291015625, + "learning_rate": 5e-06, + "loss": 0.8309, + "num_input_tokens_seen": 519366156, + "step": 4112 + }, + { + "epoch": 1.0548322965433208, + "loss": 0.7479087710380554, + "loss_ce": 0.0018150281393900514, + "loss_iou": 0.35546875, + "loss_num": 0.00689697265625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 519366156, + "step": 4112 + }, + { + "epoch": 1.055088821907266, + "grad_norm": 57.867942810058594, + "learning_rate": 5e-06, + "loss": 0.9012, + "num_input_tokens_seen": 519493528, + "step": 4113 + }, + { + "epoch": 1.055088821907266, + "loss": 1.0700764656066895, + "loss_ce": 0.0012288574362173676, + "loss_iou": 0.498046875, + "loss_num": 0.0146484375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 519493528, + "step": 4113 + }, + { + "epoch": 1.0553453472712115, + "grad_norm": 53.1524658203125, + "learning_rate": 5e-06, + "loss": 0.9169, + "num_input_tokens_seen": 519619868, + "step": 4114 + }, + { + "epoch": 1.0553453472712115, + "loss": 0.8547157049179077, + "loss_ce": 0.0012000880669802427, + "loss_iou": 0.40234375, + "loss_num": 0.00958251953125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 519619868, + "step": 4114 + }, + { + "epoch": 1.0556018726351568, + "grad_norm": 38.37180709838867, + "learning_rate": 5e-06, + "loss": 0.9315, + "num_input_tokens_seen": 519746000, + "step": 4115 + }, + { + "epoch": 1.0556018726351568, + "loss": 0.9941310882568359, + "loss_ce": 0.00023456773487851024, + "loss_iou": 0.44921875, + "loss_num": 0.0189208984375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 519746000, + "step": 4115 + }, + { + "epoch": 1.055858397999102, + "grad_norm": 45.686832427978516, + "learning_rate": 5e-06, + "loss": 0.9181, + "num_input_tokens_seen": 519873328, + "step": 4116 + }, + { + "epoch": 1.055858397999102, + "loss": 0.8802642822265625, + "loss_ce": 0.0003814716765191406, + "loss_iou": 0.4140625, + "loss_num": 0.0106201171875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 519873328, + "step": 4116 + }, + { + "epoch": 1.0561149233630476, + "grad_norm": 54.3358039855957, + "learning_rate": 5e-06, + "loss": 0.9846, + "num_input_tokens_seen": 520000200, + "step": 4117 + }, + { + "epoch": 1.0561149233630476, + "loss": 0.9730417132377625, + "loss_ce": 0.002338625956326723, + "loss_iou": 0.451171875, + "loss_num": 0.01361083984375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 520000200, + "step": 4117 + }, + { + "epoch": 1.0563714487269928, + "grad_norm": 46.20098114013672, + "learning_rate": 5e-06, + "loss": 0.8744, + "num_input_tokens_seen": 520126580, + "step": 4118 + }, + { + "epoch": 1.0563714487269928, + "loss": 0.9601276516914368, + "loss_ce": 0.00016670575132593513, + "loss_iou": 0.451171875, + "loss_num": 0.011474609375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 520126580, + "step": 4118 + }, + { + "epoch": 1.0566279740909383, + "grad_norm": 54.18983459472656, + "learning_rate": 5e-06, + "loss": 0.817, + "num_input_tokens_seen": 520253928, + "step": 4119 + }, + { + "epoch": 1.0566279740909383, + "loss": 0.6793273687362671, + "loss_ce": 0.0001281839795410633, + "loss_iou": 0.326171875, + "loss_num": 0.00543212890625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 520253928, + "step": 4119 + }, + { + "epoch": 1.0568844994548836, + "grad_norm": 52.147125244140625, + "learning_rate": 5e-06, + "loss": 0.8503, + "num_input_tokens_seen": 520381184, + "step": 4120 + }, + { + "epoch": 1.0568844994548836, + "loss": 0.9549261331558228, + "loss_ce": 0.0013128952123224735, + "loss_iou": 0.4375, + "loss_num": 0.0155029296875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 520381184, + "step": 4120 + }, + { + "epoch": 1.0571410248188289, + "grad_norm": 38.737613677978516, + "learning_rate": 5e-06, + "loss": 0.8591, + "num_input_tokens_seen": 520507784, + "step": 4121 + }, + { + "epoch": 1.0571410248188289, + "loss": 0.8146524429321289, + "loss_ce": 0.0001993473997572437, + "loss_iou": 0.388671875, + "loss_num": 0.007598876953125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 520507784, + "step": 4121 + }, + { + "epoch": 1.0573975501827744, + "grad_norm": 39.63469696044922, + "learning_rate": 5e-06, + "loss": 0.9417, + "num_input_tokens_seen": 520633556, + "step": 4122 + }, + { + "epoch": 1.0573975501827744, + "loss": 1.0310333967208862, + "loss_ce": 0.0002716941526159644, + "loss_iou": 0.478515625, + "loss_num": 0.01458740234375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 520633556, + "step": 4122 + }, + { + "epoch": 1.0576540755467196, + "grad_norm": 49.17890167236328, + "learning_rate": 5e-06, + "loss": 0.9953, + "num_input_tokens_seen": 520760448, + "step": 4123 + }, + { + "epoch": 1.0576540755467196, + "loss": 0.9290846586227417, + "loss_ce": 0.0018385492730885744, + "loss_iou": 0.44140625, + "loss_num": 0.00872802734375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 520760448, + "step": 4123 + }, + { + "epoch": 1.0579106009106651, + "grad_norm": 47.646339416503906, + "learning_rate": 5e-06, + "loss": 0.9867, + "num_input_tokens_seen": 520887212, + "step": 4124 + }, + { + "epoch": 1.0579106009106651, + "loss": 0.9489282369613647, + "loss_ce": 0.00019774268730543554, + "loss_iou": 0.4453125, + "loss_num": 0.01141357421875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 520887212, + "step": 4124 + }, + { + "epoch": 1.0581671262746104, + "grad_norm": 53.44550704956055, + "learning_rate": 5e-06, + "loss": 0.9041, + "num_input_tokens_seen": 521013336, + "step": 4125 + }, + { + "epoch": 1.0581671262746104, + "loss": 0.9720391631126404, + "loss_ce": 0.0018242656951770186, + "loss_iou": 0.44140625, + "loss_num": 0.01708984375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 521013336, + "step": 4125 + }, + { + "epoch": 1.0584236516385557, + "grad_norm": 52.63658142089844, + "learning_rate": 5e-06, + "loss": 0.9311, + "num_input_tokens_seen": 521140528, + "step": 4126 + }, + { + "epoch": 1.0584236516385557, + "loss": 0.8970633745193481, + "loss_ce": 0.00033487717155367136, + "loss_iou": 0.419921875, + "loss_num": 0.01116943359375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 521140528, + "step": 4126 + }, + { + "epoch": 1.0586801770025012, + "grad_norm": 47.721343994140625, + "learning_rate": 5e-06, + "loss": 0.8498, + "num_input_tokens_seen": 521266924, + "step": 4127 + }, + { + "epoch": 1.0586801770025012, + "loss": 0.7125214338302612, + "loss_ce": 0.001095662941224873, + "loss_iou": 0.33984375, + "loss_num": 0.006439208984375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 521266924, + "step": 4127 + }, + { + "epoch": 1.0589367023664464, + "grad_norm": 46.67226028442383, + "learning_rate": 5e-06, + "loss": 0.9359, + "num_input_tokens_seen": 521393340, + "step": 4128 + }, + { + "epoch": 1.0589367023664464, + "loss": 0.8005244731903076, + "loss_ce": 0.0016963677480816841, + "loss_iou": 0.373046875, + "loss_num": 0.0103759765625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 521393340, + "step": 4128 + }, + { + "epoch": 1.059193227730392, + "grad_norm": 65.14278411865234, + "learning_rate": 5e-06, + "loss": 0.9435, + "num_input_tokens_seen": 521520612, + "step": 4129 + }, + { + "epoch": 1.059193227730392, + "loss": 0.8951630592346191, + "loss_ce": 0.00014353141887113452, + "loss_iou": 0.42578125, + "loss_num": 0.00830078125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 521520612, + "step": 4129 + }, + { + "epoch": 1.0594497530943372, + "grad_norm": 49.00065612792969, + "learning_rate": 5e-06, + "loss": 0.9894, + "num_input_tokens_seen": 521647156, + "step": 4130 + }, + { + "epoch": 1.0594497530943372, + "loss": 0.7989012002944946, + "loss_ce": 0.0015379282413050532, + "loss_iou": 0.375, + "loss_num": 0.0093994140625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 521647156, + "step": 4130 + }, + { + "epoch": 1.0597062784582825, + "grad_norm": 20.526859283447266, + "learning_rate": 5e-06, + "loss": 0.7942, + "num_input_tokens_seen": 521773436, + "step": 4131 + }, + { + "epoch": 1.0597062784582825, + "loss": 0.7715187668800354, + "loss_ce": 0.001010984880849719, + "loss_iou": 0.36328125, + "loss_num": 0.00860595703125, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 521773436, + "step": 4131 + }, + { + "epoch": 1.059962803822228, + "grad_norm": 24.29244041442871, + "learning_rate": 5e-06, + "loss": 0.8661, + "num_input_tokens_seen": 521900468, + "step": 4132 + }, + { + "epoch": 1.059962803822228, + "loss": 0.9118916988372803, + "loss_ce": 0.0022237400989979506, + "loss_iou": 0.421875, + "loss_num": 0.012939453125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 521900468, + "step": 4132 + }, + { + "epoch": 1.0602193291861732, + "grad_norm": 32.80291748046875, + "learning_rate": 5e-06, + "loss": 0.8012, + "num_input_tokens_seen": 522026304, + "step": 4133 + }, + { + "epoch": 1.0602193291861732, + "loss": 0.9570202827453613, + "loss_ce": 0.0016980397049337626, + "loss_iou": 0.44140625, + "loss_num": 0.014892578125, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 522026304, + "step": 4133 + }, + { + "epoch": 1.0604758545501187, + "grad_norm": 36.19490051269531, + "learning_rate": 5e-06, + "loss": 0.9059, + "num_input_tokens_seen": 522151672, + "step": 4134 + }, + { + "epoch": 1.0604758545501187, + "loss": 0.8974913358688354, + "loss_ce": 0.001007014885544777, + "loss_iou": 0.41015625, + "loss_num": 0.014892578125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 522151672, + "step": 4134 + }, + { + "epoch": 1.060732379914064, + "grad_norm": 50.156185150146484, + "learning_rate": 5e-06, + "loss": 0.8395, + "num_input_tokens_seen": 522278172, + "step": 4135 + }, + { + "epoch": 1.060732379914064, + "loss": 0.8507254719734192, + "loss_ce": 0.0003836360410787165, + "loss_iou": 0.392578125, + "loss_num": 0.01324462890625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 522278172, + "step": 4135 + }, + { + "epoch": 1.0609889052780093, + "grad_norm": 33.53269577026367, + "learning_rate": 5e-06, + "loss": 0.8154, + "num_input_tokens_seen": 522404528, + "step": 4136 + }, + { + "epoch": 1.0609889052780093, + "loss": 0.9657394886016846, + "loss_ce": 0.0016281688585877419, + "loss_iou": 0.44140625, + "loss_num": 0.0166015625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 522404528, + "step": 4136 + }, + { + "epoch": 1.0612454306419548, + "grad_norm": 26.572359085083008, + "learning_rate": 5e-06, + "loss": 0.8722, + "num_input_tokens_seen": 522530228, + "step": 4137 + }, + { + "epoch": 1.0612454306419548, + "loss": 0.9496035575866699, + "loss_ce": 0.0011173111852258444, + "loss_iou": 0.435546875, + "loss_num": 0.015869140625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 522530228, + "step": 4137 + }, + { + "epoch": 1.0615019560059, + "grad_norm": 25.028228759765625, + "learning_rate": 5e-06, + "loss": 0.8578, + "num_input_tokens_seen": 522655744, + "step": 4138 + }, + { + "epoch": 1.0615019560059, + "loss": 0.8645809888839722, + "loss_ce": 0.0003231688169762492, + "loss_iou": 0.3984375, + "loss_num": 0.01324462890625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 522655744, + "step": 4138 + }, + { + "epoch": 1.0617584813698455, + "grad_norm": 33.5679817199707, + "learning_rate": 5e-06, + "loss": 0.7693, + "num_input_tokens_seen": 522782564, + "step": 4139 + }, + { + "epoch": 1.0617584813698455, + "loss": 0.852433979511261, + "loss_ce": 0.0008714952855370939, + "loss_iou": 0.40625, + "loss_num": 0.007568359375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 522782564, + "step": 4139 + }, + { + "epoch": 1.0620150067337908, + "grad_norm": 45.797183990478516, + "learning_rate": 5e-06, + "loss": 0.8403, + "num_input_tokens_seen": 522909160, + "step": 4140 + }, + { + "epoch": 1.0620150067337908, + "loss": 1.0257383584976196, + "loss_ce": 0.0005919242394156754, + "loss_iou": 0.4765625, + "loss_num": 0.014404296875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 522909160, + "step": 4140 + }, + { + "epoch": 1.062271532097736, + "grad_norm": 43.276268005371094, + "learning_rate": 5e-06, + "loss": 0.9036, + "num_input_tokens_seen": 523035012, + "step": 4141 + }, + { + "epoch": 1.062271532097736, + "loss": 1.0055038928985596, + "loss_ce": 0.00013276573736220598, + "loss_iou": 0.439453125, + "loss_num": 0.0255126953125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 523035012, + "step": 4141 + }, + { + "epoch": 1.0625280574616816, + "grad_norm": 43.09363555908203, + "learning_rate": 5e-06, + "loss": 0.7699, + "num_input_tokens_seen": 523161668, + "step": 4142 + }, + { + "epoch": 1.0625280574616816, + "loss": 0.7050259113311768, + "loss_ce": 0.00043611295404843986, + "loss_iou": 0.3359375, + "loss_num": 0.006195068359375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 523161668, + "step": 4142 + }, + { + "epoch": 1.0627845828256268, + "grad_norm": 45.93049240112305, + "learning_rate": 5e-06, + "loss": 0.8121, + "num_input_tokens_seen": 523287976, + "step": 4143 + }, + { + "epoch": 1.0627845828256268, + "loss": 1.0210237503051758, + "loss_ce": 0.0007601582910865545, + "loss_iou": 0.46875, + "loss_num": 0.0166015625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 523287976, + "step": 4143 + }, + { + "epoch": 1.0630411081895723, + "grad_norm": 47.930023193359375, + "learning_rate": 5e-06, + "loss": 0.8485, + "num_input_tokens_seen": 523415484, + "step": 4144 + }, + { + "epoch": 1.0630411081895723, + "loss": 1.0176032781600952, + "loss_ce": 0.0012458007549867034, + "loss_iou": 0.46875, + "loss_num": 0.015625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 523415484, + "step": 4144 + }, + { + "epoch": 1.0632976335535176, + "grad_norm": 45.140968322753906, + "learning_rate": 5e-06, + "loss": 0.8816, + "num_input_tokens_seen": 523542004, + "step": 4145 + }, + { + "epoch": 1.0632976335535176, + "loss": 0.8869500756263733, + "loss_ce": 0.00023139460245147347, + "loss_iou": 0.42578125, + "loss_num": 0.007568359375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 523542004, + "step": 4145 + }, + { + "epoch": 1.063554158917463, + "grad_norm": 43.99235534667969, + "learning_rate": 5e-06, + "loss": 0.8253, + "num_input_tokens_seen": 523668512, + "step": 4146 + }, + { + "epoch": 1.063554158917463, + "loss": 0.8802431225776672, + "loss_ce": 0.0003602889191824943, + "loss_iou": 0.412109375, + "loss_num": 0.0108642578125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 523668512, + "step": 4146 + }, + { + "epoch": 1.0638106842814083, + "grad_norm": 47.92687225341797, + "learning_rate": 5e-06, + "loss": 1.0266, + "num_input_tokens_seen": 523795536, + "step": 4147 + }, + { + "epoch": 1.0638106842814083, + "loss": 0.929524302482605, + "loss_ce": 0.0032547814771533012, + "loss_iou": 0.41796875, + "loss_num": 0.0179443359375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 523795536, + "step": 4147 + }, + { + "epoch": 1.0640672096453536, + "grad_norm": 48.38391876220703, + "learning_rate": 5e-06, + "loss": 0.9275, + "num_input_tokens_seen": 523921512, + "step": 4148 + }, + { + "epoch": 1.0640672096453536, + "loss": 0.9655384421348572, + "loss_ce": 0.00045057240640744567, + "loss_iou": 0.44140625, + "loss_num": 0.0164794921875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 523921512, + "step": 4148 + }, + { + "epoch": 1.064323735009299, + "grad_norm": 72.95161437988281, + "learning_rate": 5e-06, + "loss": 0.9128, + "num_input_tokens_seen": 524047488, + "step": 4149 + }, + { + "epoch": 1.064323735009299, + "loss": 1.183854341506958, + "loss_ce": 0.00026065349811688066, + "loss_iou": 0.5390625, + "loss_num": 0.0218505859375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 524047488, + "step": 4149 + }, + { + "epoch": 1.0645802603732444, + "grad_norm": 43.06047439575195, + "learning_rate": 5e-06, + "loss": 0.9277, + "num_input_tokens_seen": 524174392, + "step": 4150 + }, + { + "epoch": 1.0645802603732444, + "loss": 0.9461584091186523, + "loss_ce": 0.002310733776539564, + "loss_iou": 0.423828125, + "loss_num": 0.0194091796875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 524174392, + "step": 4150 + }, + { + "epoch": 1.0648367857371897, + "grad_norm": 43.09352493286133, + "learning_rate": 5e-06, + "loss": 0.9051, + "num_input_tokens_seen": 524300408, + "step": 4151 + }, + { + "epoch": 1.0648367857371897, + "loss": 0.9206950664520264, + "loss_ce": 0.0032146023586392403, + "loss_iou": 0.4140625, + "loss_num": 0.017578125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 524300408, + "step": 4151 + }, + { + "epoch": 1.0650933111011351, + "grad_norm": 45.6676139831543, + "learning_rate": 5e-06, + "loss": 0.77, + "num_input_tokens_seen": 524426216, + "step": 4152 + }, + { + "epoch": 1.0650933111011351, + "loss": 0.8179129362106323, + "loss_ce": 0.0002860224340111017, + "loss_iou": 0.380859375, + "loss_num": 0.01123046875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 524426216, + "step": 4152 + }, + { + "epoch": 1.0653498364650804, + "grad_norm": 39.8922119140625, + "learning_rate": 5e-06, + "loss": 0.9816, + "num_input_tokens_seen": 524551740, + "step": 4153 + }, + { + "epoch": 1.0653498364650804, + "loss": 0.9455645680427551, + "loss_ce": 0.002693519229069352, + "loss_iou": 0.435546875, + "loss_num": 0.0145263671875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 524551740, + "step": 4153 + }, + { + "epoch": 1.065606361829026, + "grad_norm": 31.099653244018555, + "learning_rate": 5e-06, + "loss": 0.8065, + "num_input_tokens_seen": 524678504, + "step": 4154 + }, + { + "epoch": 1.065606361829026, + "loss": 0.7129804491996765, + "loss_ce": 0.0005780643550679088, + "loss_iou": 0.326171875, + "loss_num": 0.01190185546875, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 524678504, + "step": 4154 + }, + { + "epoch": 1.0658628871929712, + "grad_norm": 37.867855072021484, + "learning_rate": 5e-06, + "loss": 0.7288, + "num_input_tokens_seen": 524805312, + "step": 4155 + }, + { + "epoch": 1.0658628871929712, + "loss": 0.8181420564651489, + "loss_ce": 0.0002709571272134781, + "loss_iou": 0.390625, + "loss_num": 0.007598876953125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 524805312, + "step": 4155 + }, + { + "epoch": 1.0661194125569167, + "grad_norm": 42.53268051147461, + "learning_rate": 5e-06, + "loss": 0.9812, + "num_input_tokens_seen": 524930884, + "step": 4156 + }, + { + "epoch": 1.0661194125569167, + "loss": 1.0750318765640259, + "loss_ce": 0.004719448275864124, + "loss_iou": 0.494140625, + "loss_num": 0.0166015625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 524930884, + "step": 4156 + }, + { + "epoch": 1.066375937920862, + "grad_norm": 37.63185501098633, + "learning_rate": 5e-06, + "loss": 0.791, + "num_input_tokens_seen": 525056908, + "step": 4157 + }, + { + "epoch": 1.066375937920862, + "loss": 0.9166965484619141, + "loss_ce": 0.00019264084403403103, + "loss_iou": 0.41015625, + "loss_num": 0.019287109375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 525056908, + "step": 4157 + }, + { + "epoch": 1.0666324632848072, + "grad_norm": 50.57176208496094, + "learning_rate": 5e-06, + "loss": 0.8695, + "num_input_tokens_seen": 525182692, + "step": 4158 + }, + { + "epoch": 1.0666324632848072, + "loss": 0.8871719241142273, + "loss_ce": 0.0004531640443019569, + "loss_iou": 0.419921875, + "loss_num": 0.009033203125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 525182692, + "step": 4158 + }, + { + "epoch": 1.0668889886487527, + "grad_norm": 57.064117431640625, + "learning_rate": 5e-06, + "loss": 0.8549, + "num_input_tokens_seen": 525311112, + "step": 4159 + }, + { + "epoch": 1.0668889886487527, + "loss": 0.7742582559585571, + "loss_ce": 0.00033243917278014123, + "loss_iou": 0.36328125, + "loss_num": 0.00982666015625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 525311112, + "step": 4159 + }, + { + "epoch": 1.067145514012698, + "grad_norm": 62.50679397583008, + "learning_rate": 5e-06, + "loss": 0.9899, + "num_input_tokens_seen": 525436548, + "step": 4160 + }, + { + "epoch": 1.067145514012698, + "loss": 1.0277459621429443, + "loss_ce": 0.0013787655625492334, + "loss_iou": 0.455078125, + "loss_num": 0.023193359375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 525436548, + "step": 4160 + }, + { + "epoch": 1.0674020393766435, + "grad_norm": 52.06275939941406, + "learning_rate": 5e-06, + "loss": 1.012, + "num_input_tokens_seen": 525562876, + "step": 4161 + }, + { + "epoch": 1.0674020393766435, + "loss": 0.916785478591919, + "loss_ce": 0.0002815945481415838, + "loss_iou": 0.42578125, + "loss_num": 0.01251220703125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 525562876, + "step": 4161 + }, + { + "epoch": 1.0676585647405887, + "grad_norm": 43.26593780517578, + "learning_rate": 5e-06, + "loss": 0.8779, + "num_input_tokens_seen": 525689436, + "step": 4162 + }, + { + "epoch": 1.0676585647405887, + "loss": 0.8051106929779053, + "loss_ce": 0.0013997521018609405, + "loss_iou": 0.380859375, + "loss_num": 0.0081787109375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 525689436, + "step": 4162 + }, + { + "epoch": 1.067915090104534, + "grad_norm": 66.40210723876953, + "learning_rate": 5e-06, + "loss": 0.8496, + "num_input_tokens_seen": 525816484, + "step": 4163 + }, + { + "epoch": 1.067915090104534, + "loss": 0.7468167543411255, + "loss_ce": 0.0007230263436213136, + "loss_iou": 0.345703125, + "loss_num": 0.0107421875, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 525816484, + "step": 4163 + }, + { + "epoch": 1.0681716154684795, + "grad_norm": 48.73047637939453, + "learning_rate": 5e-06, + "loss": 0.9952, + "num_input_tokens_seen": 525941296, + "step": 4164 + }, + { + "epoch": 1.0681716154684795, + "loss": 0.9476333856582642, + "loss_ce": 0.0018326574936509132, + "loss_iou": 0.443359375, + "loss_num": 0.01202392578125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 525941296, + "step": 4164 + }, + { + "epoch": 1.0684281408324248, + "grad_norm": 47.21750259399414, + "learning_rate": 5e-06, + "loss": 0.8577, + "num_input_tokens_seen": 526067284, + "step": 4165 + }, + { + "epoch": 1.0684281408324248, + "loss": 1.027710199356079, + "loss_ce": 0.0008548051118850708, + "loss_iou": 0.484375, + "loss_num": 0.01129150390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 526067284, + "step": 4165 + }, + { + "epoch": 1.0686846661963703, + "grad_norm": 46.33027648925781, + "learning_rate": 5e-06, + "loss": 0.8874, + "num_input_tokens_seen": 526193248, + "step": 4166 + }, + { + "epoch": 1.0686846661963703, + "loss": 0.7645811438560486, + "loss_ce": 0.0004209933977108449, + "loss_iou": 0.357421875, + "loss_num": 0.00970458984375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 526193248, + "step": 4166 + }, + { + "epoch": 1.0689411915603155, + "grad_norm": 47.78883361816406, + "learning_rate": 5e-06, + "loss": 0.8729, + "num_input_tokens_seen": 526319480, + "step": 4167 + }, + { + "epoch": 1.0689411915603155, + "loss": 0.8846005797386169, + "loss_ce": 0.00032323168125003576, + "loss_iou": 0.41796875, + "loss_num": 0.00933837890625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 526319480, + "step": 4167 + }, + { + "epoch": 1.0691977169242608, + "grad_norm": 59.405975341796875, + "learning_rate": 5e-06, + "loss": 0.9017, + "num_input_tokens_seen": 526445624, + "step": 4168 + }, + { + "epoch": 1.0691977169242608, + "loss": 0.7328838109970093, + "loss_ce": 0.0007060596835799515, + "loss_iou": 0.345703125, + "loss_num": 0.00811767578125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 526445624, + "step": 4168 + }, + { + "epoch": 1.0694542422882063, + "grad_norm": 42.227962493896484, + "learning_rate": 5e-06, + "loss": 0.9368, + "num_input_tokens_seen": 526571100, + "step": 4169 + }, + { + "epoch": 1.0694542422882063, + "loss": 0.8257947564125061, + "loss_ce": 0.0010877142194658518, + "loss_iou": 0.390625, + "loss_num": 0.00897216796875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 526571100, + "step": 4169 + }, + { + "epoch": 1.0697107676521516, + "grad_norm": 28.14371109008789, + "learning_rate": 5e-06, + "loss": 0.7942, + "num_input_tokens_seen": 526696932, + "step": 4170 + }, + { + "epoch": 1.0697107676521516, + "loss": 0.913670539855957, + "loss_ce": 0.0013170039746910334, + "loss_iou": 0.42578125, + "loss_num": 0.01251220703125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 526696932, + "step": 4170 + }, + { + "epoch": 1.069967293016097, + "grad_norm": 48.249664306640625, + "learning_rate": 5e-06, + "loss": 0.9169, + "num_input_tokens_seen": 526824396, + "step": 4171 + }, + { + "epoch": 1.069967293016097, + "loss": 0.8387980461120605, + "loss_ce": 0.00041919059003703296, + "loss_iou": 0.388671875, + "loss_num": 0.0120849609375, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 526824396, + "step": 4171 + }, + { + "epoch": 1.0702238183800423, + "grad_norm": 39.90413284301758, + "learning_rate": 5e-06, + "loss": 0.8026, + "num_input_tokens_seen": 526950580, + "step": 4172 + }, + { + "epoch": 1.0702238183800423, + "loss": 0.740874171257019, + "loss_ce": 0.0008839551592245698, + "loss_iou": 0.34765625, + "loss_num": 0.00860595703125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 526950580, + "step": 4172 + }, + { + "epoch": 1.0704803437439876, + "grad_norm": 44.372032165527344, + "learning_rate": 5e-06, + "loss": 0.8204, + "num_input_tokens_seen": 527077696, + "step": 4173 + }, + { + "epoch": 1.0704803437439876, + "loss": 0.97274249792099, + "loss_ce": 0.0015510930679738522, + "loss_iou": 0.435546875, + "loss_num": 0.020263671875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 527077696, + "step": 4173 + }, + { + "epoch": 1.070736869107933, + "grad_norm": 54.79743957519531, + "learning_rate": 5e-06, + "loss": 0.9525, + "num_input_tokens_seen": 527203196, + "step": 4174 + }, + { + "epoch": 1.070736869107933, + "loss": 1.0224778652191162, + "loss_ce": 0.001237738411873579, + "loss_iou": 0.4609375, + "loss_num": 0.01953125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 527203196, + "step": 4174 + }, + { + "epoch": 1.0709933944718784, + "grad_norm": 51.15628433227539, + "learning_rate": 5e-06, + "loss": 0.8438, + "num_input_tokens_seen": 527331152, + "step": 4175 + }, + { + "epoch": 1.0709933944718784, + "loss": 0.8993298411369324, + "loss_ce": 0.0011365215759724379, + "loss_iou": 0.412109375, + "loss_num": 0.014892578125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 527331152, + "step": 4175 + }, + { + "epoch": 1.0712499198358238, + "grad_norm": 56.01426696777344, + "learning_rate": 5e-06, + "loss": 0.9121, + "num_input_tokens_seen": 527457144, + "step": 4176 + }, + { + "epoch": 1.0712499198358238, + "loss": 0.7160322666168213, + "loss_ce": 0.00021199611364863813, + "loss_iou": 0.34765625, + "loss_num": 0.004364013671875, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 527457144, + "step": 4176 + }, + { + "epoch": 1.0715064451997691, + "grad_norm": 40.7580451965332, + "learning_rate": 5e-06, + "loss": 0.8728, + "num_input_tokens_seen": 527583712, + "step": 4177 + }, + { + "epoch": 1.0715064451997691, + "loss": 0.7623565196990967, + "loss_ce": 0.0001494719326728955, + "loss_iou": 0.353515625, + "loss_num": 0.0107421875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 527583712, + "step": 4177 + }, + { + "epoch": 1.0717629705637144, + "grad_norm": 27.959163665771484, + "learning_rate": 5e-06, + "loss": 0.7653, + "num_input_tokens_seen": 527710364, + "step": 4178 + }, + { + "epoch": 1.0717629705637144, + "loss": 0.9189596176147461, + "loss_ce": 0.003432276425883174, + "loss_iou": 0.408203125, + "loss_num": 0.0201416015625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 527710364, + "step": 4178 + }, + { + "epoch": 1.0720194959276599, + "grad_norm": 35.057247161865234, + "learning_rate": 5e-06, + "loss": 0.8908, + "num_input_tokens_seen": 527836020, + "step": 4179 + }, + { + "epoch": 1.0720194959276599, + "loss": 0.9363690614700317, + "loss_ce": 0.0010663105640560389, + "loss_iou": 0.43359375, + "loss_num": 0.01397705078125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 527836020, + "step": 4179 + }, + { + "epoch": 1.0722760212916052, + "grad_norm": 42.190067291259766, + "learning_rate": 5e-06, + "loss": 0.9189, + "num_input_tokens_seen": 527962596, + "step": 4180 + }, + { + "epoch": 1.0722760212916052, + "loss": 0.8115580081939697, + "loss_ce": 0.002476019086316228, + "loss_iou": 0.3828125, + "loss_num": 0.0089111328125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 527962596, + "step": 4180 + }, + { + "epoch": 1.0725325466555506, + "grad_norm": 37.59096145629883, + "learning_rate": 5e-06, + "loss": 0.8276, + "num_input_tokens_seen": 528088972, + "step": 4181 + }, + { + "epoch": 1.0725325466555506, + "loss": 0.8050222396850586, + "loss_ce": 0.00033473348594270647, + "loss_iou": 0.38671875, + "loss_num": 0.006683349609375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 528088972, + "step": 4181 + }, + { + "epoch": 1.072789072019496, + "grad_norm": 45.51539611816406, + "learning_rate": 5e-06, + "loss": 0.8164, + "num_input_tokens_seen": 528215628, + "step": 4182 + }, + { + "epoch": 1.072789072019496, + "loss": 0.8562222719192505, + "loss_ce": 0.00026527117006480694, + "loss_iou": 0.392578125, + "loss_num": 0.013916015625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 528215628, + "step": 4182 + }, + { + "epoch": 1.0730455973834412, + "grad_norm": 39.7054443359375, + "learning_rate": 5e-06, + "loss": 0.9525, + "num_input_tokens_seen": 528341552, + "step": 4183 + }, + { + "epoch": 1.0730455973834412, + "loss": 0.9826571941375732, + "loss_ce": 0.0007235925877466798, + "loss_iou": 0.443359375, + "loss_num": 0.0191650390625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 528341552, + "step": 4183 + }, + { + "epoch": 1.0733021227473867, + "grad_norm": 42.57738494873047, + "learning_rate": 5e-06, + "loss": 0.8493, + "num_input_tokens_seen": 528467956, + "step": 4184 + }, + { + "epoch": 1.0733021227473867, + "loss": 0.7209435105323792, + "loss_ce": 0.0014610780635848641, + "loss_iou": 0.33203125, + "loss_num": 0.0107421875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 528467956, + "step": 4184 + }, + { + "epoch": 1.073558648111332, + "grad_norm": 41.62625503540039, + "learning_rate": 5e-06, + "loss": 0.8412, + "num_input_tokens_seen": 528594052, + "step": 4185 + }, + { + "epoch": 1.073558648111332, + "loss": 0.8481071591377258, + "loss_ce": 0.0036247444804757833, + "loss_iou": 0.388671875, + "loss_num": 0.01348876953125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 528594052, + "step": 4185 + }, + { + "epoch": 1.0738151734752774, + "grad_norm": 58.26880645751953, + "learning_rate": 5e-06, + "loss": 0.9427, + "num_input_tokens_seen": 528721424, + "step": 4186 + }, + { + "epoch": 1.0738151734752774, + "loss": 1.119249939918518, + "loss_ce": 0.0013301001163199544, + "loss_iou": 0.498046875, + "loss_num": 0.0244140625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 528721424, + "step": 4186 + }, + { + "epoch": 1.0740716988392227, + "grad_norm": 48.4378776550293, + "learning_rate": 5e-06, + "loss": 0.9677, + "num_input_tokens_seen": 528848088, + "step": 4187 + }, + { + "epoch": 1.0740716988392227, + "loss": 0.7414765954017639, + "loss_ce": 0.0005098258843645453, + "loss_iou": 0.349609375, + "loss_num": 0.00860595703125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 528848088, + "step": 4187 + }, + { + "epoch": 1.074328224203168, + "grad_norm": 29.05378532409668, + "learning_rate": 5e-06, + "loss": 0.8722, + "num_input_tokens_seen": 528975016, + "step": 4188 + }, + { + "epoch": 1.074328224203168, + "loss": 0.7682508826255798, + "loss_ce": 0.0004286542534828186, + "loss_iou": 0.369140625, + "loss_num": 0.005859375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 528975016, + "step": 4188 + }, + { + "epoch": 1.0745847495671135, + "grad_norm": 37.82373046875, + "learning_rate": 5e-06, + "loss": 0.84, + "num_input_tokens_seen": 529100964, + "step": 4189 + }, + { + "epoch": 1.0745847495671135, + "loss": 0.6978469491004944, + "loss_ce": 9.303812112193555e-05, + "loss_iou": 0.333984375, + "loss_num": 0.00567626953125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 529100964, + "step": 4189 + }, + { + "epoch": 1.0748412749310587, + "grad_norm": 58.646358489990234, + "learning_rate": 5e-06, + "loss": 0.8189, + "num_input_tokens_seen": 529227284, + "step": 4190 + }, + { + "epoch": 1.0748412749310587, + "loss": 0.8101799488067627, + "loss_ce": 0.0003654654719866812, + "loss_iou": 0.37890625, + "loss_num": 0.010009765625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 529227284, + "step": 4190 + }, + { + "epoch": 1.0750978002950042, + "grad_norm": 63.41388702392578, + "learning_rate": 5e-06, + "loss": 0.8657, + "num_input_tokens_seen": 529354560, + "step": 4191 + }, + { + "epoch": 1.0750978002950042, + "loss": 0.8496758937835693, + "loss_ce": 6.65304105496034e-05, + "loss_iou": 0.400390625, + "loss_num": 0.0098876953125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 529354560, + "step": 4191 + }, + { + "epoch": 1.0753543256589495, + "grad_norm": 68.31269073486328, + "learning_rate": 5e-06, + "loss": 0.8689, + "num_input_tokens_seen": 529480904, + "step": 4192 + }, + { + "epoch": 1.0753543256589495, + "loss": 0.8683198690414429, + "loss_ce": 0.00015579210594296455, + "loss_iou": 0.396484375, + "loss_num": 0.0150146484375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 529480904, + "step": 4192 + }, + { + "epoch": 1.075610851022895, + "grad_norm": 47.187068939208984, + "learning_rate": 5e-06, + "loss": 1.0213, + "num_input_tokens_seen": 529607548, + "step": 4193 + }, + { + "epoch": 1.075610851022895, + "loss": 1.0502688884735107, + "loss_ce": 0.0019290390191599727, + "loss_iou": 0.48046875, + "loss_num": 0.0179443359375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 529607548, + "step": 4193 + }, + { + "epoch": 1.0758673763868403, + "grad_norm": 19.860132217407227, + "learning_rate": 5e-06, + "loss": 0.9243, + "num_input_tokens_seen": 529735288, + "step": 4194 + }, + { + "epoch": 1.0758673763868403, + "loss": 1.0263779163360596, + "loss_ce": 0.0004990854649804533, + "loss_iou": 0.462890625, + "loss_num": 0.0196533203125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 529735288, + "step": 4194 + }, + { + "epoch": 1.0761239017507855, + "grad_norm": 72.54859161376953, + "learning_rate": 5e-06, + "loss": 0.8668, + "num_input_tokens_seen": 529860936, + "step": 4195 + }, + { + "epoch": 1.0761239017507855, + "loss": 1.0949292182922363, + "loss_ce": 0.002155718393623829, + "loss_iou": 0.498046875, + "loss_num": 0.018798828125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 529860936, + "step": 4195 + }, + { + "epoch": 1.076380427114731, + "grad_norm": 30.86298179626465, + "learning_rate": 5e-06, + "loss": 0.9305, + "num_input_tokens_seen": 529986772, + "step": 4196 + }, + { + "epoch": 1.076380427114731, + "loss": 0.7357765436172485, + "loss_ce": 0.0004250332713127136, + "loss_iou": 0.34375, + "loss_num": 0.00927734375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 529986772, + "step": 4196 + }, + { + "epoch": 1.0766369524786763, + "grad_norm": 38.56808090209961, + "learning_rate": 5e-06, + "loss": 0.8046, + "num_input_tokens_seen": 530113344, + "step": 4197 + }, + { + "epoch": 1.0766369524786763, + "loss": 0.6771001815795898, + "loss_ce": 9.8239557701163e-05, + "loss_iou": 0.326171875, + "loss_num": 0.005096435546875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 530113344, + "step": 4197 + }, + { + "epoch": 1.0768934778426216, + "grad_norm": 33.69475555419922, + "learning_rate": 5e-06, + "loss": 0.8813, + "num_input_tokens_seen": 530238984, + "step": 4198 + }, + { + "epoch": 1.0768934778426216, + "loss": 0.9525139927864075, + "loss_ce": 0.000853819539770484, + "loss_iou": 0.427734375, + "loss_num": 0.01904296875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 530238984, + "step": 4198 + }, + { + "epoch": 1.077150003206567, + "grad_norm": 32.98690414428711, + "learning_rate": 5e-06, + "loss": 0.801, + "num_input_tokens_seen": 530366188, + "step": 4199 + }, + { + "epoch": 1.077150003206567, + "loss": 0.8253825902938843, + "loss_ce": 0.00018727047427091748, + "loss_iou": 0.37890625, + "loss_num": 0.0133056640625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 530366188, + "step": 4199 + }, + { + "epoch": 1.0774065285705123, + "grad_norm": 51.6097412109375, + "learning_rate": 5e-06, + "loss": 0.8374, + "num_input_tokens_seen": 530492600, + "step": 4200 + }, + { + "epoch": 1.0774065285705123, + "loss": 0.9814297556877136, + "loss_ce": 0.00291415024548769, + "loss_iou": 0.431640625, + "loss_num": 0.0230712890625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 530492600, + "step": 4200 + }, + { + "epoch": 1.0776630539344578, + "grad_norm": 44.34135055541992, + "learning_rate": 5e-06, + "loss": 0.8513, + "num_input_tokens_seen": 530618964, + "step": 4201 + }, + { + "epoch": 1.0776630539344578, + "loss": 0.8574546575546265, + "loss_ce": 0.0019859308376908302, + "loss_iou": 0.396484375, + "loss_num": 0.01263427734375, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 530618964, + "step": 4201 + }, + { + "epoch": 1.077919579298403, + "grad_norm": 32.64875793457031, + "learning_rate": 5e-06, + "loss": 0.7987, + "num_input_tokens_seen": 530745292, + "step": 4202 + }, + { + "epoch": 1.077919579298403, + "loss": 0.8369764089584351, + "loss_ce": 0.00030647614039480686, + "loss_iou": 0.380859375, + "loss_num": 0.0147705078125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 530745292, + "step": 4202 + }, + { + "epoch": 1.0781761046623486, + "grad_norm": 45.32022476196289, + "learning_rate": 5e-06, + "loss": 0.87, + "num_input_tokens_seen": 530873132, + "step": 4203 + }, + { + "epoch": 1.0781761046623486, + "loss": 1.0392160415649414, + "loss_ce": 0.0011301666963845491, + "loss_iou": 0.458984375, + "loss_num": 0.0238037109375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 530873132, + "step": 4203 + }, + { + "epoch": 1.0784326300262939, + "grad_norm": 59.504520416259766, + "learning_rate": 5e-06, + "loss": 0.868, + "num_input_tokens_seen": 530999516, + "step": 4204 + }, + { + "epoch": 1.0784326300262939, + "loss": 0.8765503168106079, + "loss_ce": 0.0049683284014463425, + "loss_iou": 0.404296875, + "loss_num": 0.012451171875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 530999516, + "step": 4204 + }, + { + "epoch": 1.0786891553902391, + "grad_norm": 61.230506896972656, + "learning_rate": 5e-06, + "loss": 0.8981, + "num_input_tokens_seen": 531126468, + "step": 4205 + }, + { + "epoch": 1.0786891553902391, + "loss": 0.7520017027854919, + "loss_ce": 0.0010251434287056327, + "loss_iou": 0.36328125, + "loss_num": 0.004669189453125, + "loss_xval": 0.75, + "num_input_tokens_seen": 531126468, + "step": 4205 + }, + { + "epoch": 1.0789456807541846, + "grad_norm": 53.96475601196289, + "learning_rate": 5e-06, + "loss": 0.8537, + "num_input_tokens_seen": 531252620, + "step": 4206 + }, + { + "epoch": 1.0789456807541846, + "loss": 0.8793063163757324, + "loss_ce": 0.0013766268966719508, + "loss_iou": 0.41796875, + "loss_num": 0.0089111328125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 531252620, + "step": 4206 + }, + { + "epoch": 1.07920220611813, + "grad_norm": 53.399471282958984, + "learning_rate": 5e-06, + "loss": 0.8832, + "num_input_tokens_seen": 531380108, + "step": 4207 + }, + { + "epoch": 1.07920220611813, + "loss": 0.9851667881011963, + "loss_ce": 0.0022566020488739014, + "loss_iou": 0.4453125, + "loss_num": 0.0185546875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 531380108, + "step": 4207 + }, + { + "epoch": 1.0794587314820754, + "grad_norm": 55.17346954345703, + "learning_rate": 5e-06, + "loss": 0.8325, + "num_input_tokens_seen": 531507208, + "step": 4208 + }, + { + "epoch": 1.0794587314820754, + "loss": 0.8835180997848511, + "loss_ce": 0.001193856354802847, + "loss_iou": 0.4140625, + "loss_num": 0.01080322265625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 531507208, + "step": 4208 + }, + { + "epoch": 1.0797152568460207, + "grad_norm": 55.189151763916016, + "learning_rate": 5e-06, + "loss": 0.8871, + "num_input_tokens_seen": 531634352, + "step": 4209 + }, + { + "epoch": 1.0797152568460207, + "loss": 0.8817172050476074, + "loss_ce": 0.000857790932059288, + "loss_iou": 0.41015625, + "loss_num": 0.0120849609375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 531634352, + "step": 4209 + }, + { + "epoch": 1.079971782209966, + "grad_norm": 47.58444595336914, + "learning_rate": 5e-06, + "loss": 0.9878, + "num_input_tokens_seen": 531761008, + "step": 4210 + }, + { + "epoch": 1.079971782209966, + "loss": 1.1156752109527588, + "loss_ce": 0.0014174773823469877, + "loss_iou": 0.5, + "loss_num": 0.0233154296875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 531761008, + "step": 4210 + }, + { + "epoch": 1.0802283075739114, + "grad_norm": 41.86310577392578, + "learning_rate": 5e-06, + "loss": 0.8807, + "num_input_tokens_seen": 531888004, + "step": 4211 + }, + { + "epoch": 1.0802283075739114, + "loss": 0.8120375871658325, + "loss_ce": 0.001002421253360808, + "loss_iou": 0.37890625, + "loss_num": 0.01043701171875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 531888004, + "step": 4211 + }, + { + "epoch": 1.0804848329378567, + "grad_norm": 39.249576568603516, + "learning_rate": 5e-06, + "loss": 0.7756, + "num_input_tokens_seen": 532012892, + "step": 4212 + }, + { + "epoch": 1.0804848329378567, + "loss": 0.6914447546005249, + "loss_ce": 0.0032123287674039602, + "loss_iou": 0.326171875, + "loss_num": 0.00714111328125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 532012892, + "step": 4212 + }, + { + "epoch": 1.0807413583018022, + "grad_norm": 46.90755844116211, + "learning_rate": 5e-06, + "loss": 0.8608, + "num_input_tokens_seen": 532140004, + "step": 4213 + }, + { + "epoch": 1.0807413583018022, + "loss": 0.7585276365280151, + "loss_ce": 0.0012033790117129683, + "loss_iou": 0.36328125, + "loss_num": 0.005767822265625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 532140004, + "step": 4213 + }, + { + "epoch": 1.0809978836657474, + "grad_norm": 47.502777099609375, + "learning_rate": 5e-06, + "loss": 0.8863, + "num_input_tokens_seen": 532266248, + "step": 4214 + }, + { + "epoch": 1.0809978836657474, + "loss": 0.8726658225059509, + "loss_ce": 0.00010722808656282723, + "loss_iou": 0.4140625, + "loss_num": 0.00848388671875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 532266248, + "step": 4214 + }, + { + "epoch": 1.0812544090296927, + "grad_norm": 41.40102005004883, + "learning_rate": 5e-06, + "loss": 0.916, + "num_input_tokens_seen": 532393040, + "step": 4215 + }, + { + "epoch": 1.0812544090296927, + "loss": 1.0989556312561035, + "loss_ce": 0.0012993266573175788, + "loss_iou": 0.50390625, + "loss_num": 0.018310546875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 532393040, + "step": 4215 + }, + { + "epoch": 1.0815109343936382, + "grad_norm": 59.09712600708008, + "learning_rate": 5e-06, + "loss": 0.7986, + "num_input_tokens_seen": 532518856, + "step": 4216 + }, + { + "epoch": 1.0815109343936382, + "loss": 0.7000777721405029, + "loss_ce": 0.0006148414104245603, + "loss_iou": 0.3359375, + "loss_num": 0.00579833984375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 532518856, + "step": 4216 + }, + { + "epoch": 1.0817674597575835, + "grad_norm": 53.499820709228516, + "learning_rate": 5e-06, + "loss": 0.897, + "num_input_tokens_seen": 532645012, + "step": 4217 + }, + { + "epoch": 1.0817674597575835, + "loss": 0.9126287698745728, + "loss_ce": 0.0014959567924961448, + "loss_iou": 0.41015625, + "loss_num": 0.018310546875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 532645012, + "step": 4217 + }, + { + "epoch": 1.082023985121529, + "grad_norm": 33.29347610473633, + "learning_rate": 5e-06, + "loss": 0.9478, + "num_input_tokens_seen": 532771248, + "step": 4218 + }, + { + "epoch": 1.082023985121529, + "loss": 1.2001385688781738, + "loss_ce": 0.005802708677947521, + "loss_iou": 0.515625, + "loss_num": 0.0322265625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 532771248, + "step": 4218 + }, + { + "epoch": 1.0822805104854742, + "grad_norm": 35.39781188964844, + "learning_rate": 5e-06, + "loss": 0.9198, + "num_input_tokens_seen": 532898612, + "step": 4219 + }, + { + "epoch": 1.0822805104854742, + "loss": 0.885926365852356, + "loss_ce": 0.0006724612321704626, + "loss_iou": 0.412109375, + "loss_num": 0.01202392578125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 532898612, + "step": 4219 + }, + { + "epoch": 1.0825370358494195, + "grad_norm": 34.225677490234375, + "learning_rate": 5e-06, + "loss": 0.7988, + "num_input_tokens_seen": 533024504, + "step": 4220 + }, + { + "epoch": 1.0825370358494195, + "loss": 0.8996380567550659, + "loss_ce": 0.0016888619866222143, + "loss_iou": 0.419921875, + "loss_num": 0.01177978515625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 533024504, + "step": 4220 + }, + { + "epoch": 1.082793561213365, + "grad_norm": 44.10832214355469, + "learning_rate": 5e-06, + "loss": 0.885, + "num_input_tokens_seen": 533151580, + "step": 4221 + }, + { + "epoch": 1.082793561213365, + "loss": 0.8218718767166138, + "loss_ce": 0.000582851585932076, + "loss_iou": 0.3828125, + "loss_num": 0.011474609375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 533151580, + "step": 4221 + }, + { + "epoch": 1.0830500865773103, + "grad_norm": 46.55870819091797, + "learning_rate": 5e-06, + "loss": 0.8533, + "num_input_tokens_seen": 533277624, + "step": 4222 + }, + { + "epoch": 1.0830500865773103, + "loss": 1.001863718032837, + "loss_ce": 0.0006430696230381727, + "loss_iou": 0.45703125, + "loss_num": 0.0174560546875, + "loss_xval": 1.0, + "num_input_tokens_seen": 533277624, + "step": 4222 + }, + { + "epoch": 1.0833066119412558, + "grad_norm": 43.02298355102539, + "learning_rate": 5e-06, + "loss": 0.8653, + "num_input_tokens_seen": 533404032, + "step": 4223 + }, + { + "epoch": 1.0833066119412558, + "loss": 1.0847413539886475, + "loss_ce": 0.002221813192591071, + "loss_iou": 0.49609375, + "loss_num": 0.0184326171875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 533404032, + "step": 4223 + }, + { + "epoch": 1.083563137305201, + "grad_norm": 43.85771179199219, + "learning_rate": 5e-06, + "loss": 0.7463, + "num_input_tokens_seen": 533530080, + "step": 4224 + }, + { + "epoch": 1.083563137305201, + "loss": 0.7807549834251404, + "loss_ce": 0.00048152607632800937, + "loss_iou": 0.361328125, + "loss_num": 0.01190185546875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 533530080, + "step": 4224 + }, + { + "epoch": 1.0838196626691463, + "grad_norm": 46.588233947753906, + "learning_rate": 5e-06, + "loss": 0.8319, + "num_input_tokens_seen": 533657120, + "step": 4225 + }, + { + "epoch": 1.0838196626691463, + "loss": 0.7868473529815674, + "loss_ce": 0.00022626503778155893, + "loss_iou": 0.37890625, + "loss_num": 0.00543212890625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 533657120, + "step": 4225 + }, + { + "epoch": 1.0840761880330918, + "grad_norm": 42.978641510009766, + "learning_rate": 5e-06, + "loss": 0.8432, + "num_input_tokens_seen": 533783784, + "step": 4226 + }, + { + "epoch": 1.0840761880330918, + "loss": 0.8225687742233276, + "loss_ce": 0.0007914521847851574, + "loss_iou": 0.38671875, + "loss_num": 0.009521484375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 533783784, + "step": 4226 + }, + { + "epoch": 1.084332713397037, + "grad_norm": 44.81273651123047, + "learning_rate": 5e-06, + "loss": 0.9412, + "num_input_tokens_seen": 533911388, + "step": 4227 + }, + { + "epoch": 1.084332713397037, + "loss": 1.2181427478790283, + "loss_ce": 0.0013458174653351307, + "loss_iou": 0.5546875, + "loss_num": 0.022216796875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 533911388, + "step": 4227 + }, + { + "epoch": 1.0845892387609826, + "grad_norm": 54.927406311035156, + "learning_rate": 5e-06, + "loss": 0.7777, + "num_input_tokens_seen": 534039472, + "step": 4228 + }, + { + "epoch": 1.0845892387609826, + "loss": 0.8690335750579834, + "loss_ce": 0.0003812081413343549, + "loss_iou": 0.419921875, + "loss_num": 0.00579833984375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 534039472, + "step": 4228 + }, + { + "epoch": 1.0848457641249278, + "grad_norm": 53.92333221435547, + "learning_rate": 5e-06, + "loss": 1.0151, + "num_input_tokens_seen": 534167032, + "step": 4229 + }, + { + "epoch": 1.0848457641249278, + "loss": 0.8962100744247437, + "loss_ce": 0.0011905742576345801, + "loss_iou": 0.41015625, + "loss_num": 0.0146484375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 534167032, + "step": 4229 + }, + { + "epoch": 1.0851022894888733, + "grad_norm": 57.178504943847656, + "learning_rate": 5e-06, + "loss": 0.983, + "num_input_tokens_seen": 534293676, + "step": 4230 + }, + { + "epoch": 1.0851022894888733, + "loss": 1.1175835132598877, + "loss_ce": 0.0018608798272907734, + "loss_iou": 0.5, + "loss_num": 0.02294921875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 534293676, + "step": 4230 + }, + { + "epoch": 1.0853588148528186, + "grad_norm": 50.4942741394043, + "learning_rate": 5e-06, + "loss": 0.8058, + "num_input_tokens_seen": 534419448, + "step": 4231 + }, + { + "epoch": 1.0853588148528186, + "loss": 0.9174057245254517, + "loss_ce": 0.0033432317432016134, + "loss_iou": 0.41015625, + "loss_num": 0.0189208984375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 534419448, + "step": 4231 + }, + { + "epoch": 1.0856153402167639, + "grad_norm": 28.80629539489746, + "learning_rate": 5e-06, + "loss": 0.8629, + "num_input_tokens_seen": 534545564, + "step": 4232 + }, + { + "epoch": 1.0856153402167639, + "loss": 0.8702431917190552, + "loss_ce": 0.0025673885829746723, + "loss_iou": 0.404296875, + "loss_num": 0.0118408203125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 534545564, + "step": 4232 + }, + { + "epoch": 1.0858718655807094, + "grad_norm": 31.324724197387695, + "learning_rate": 5e-06, + "loss": 0.926, + "num_input_tokens_seen": 534672808, + "step": 4233 + }, + { + "epoch": 1.0858718655807094, + "loss": 0.8496278524398804, + "loss_ce": 0.0022157507482916117, + "loss_iou": 0.38671875, + "loss_num": 0.0145263671875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 534672808, + "step": 4233 + }, + { + "epoch": 1.0861283909446546, + "grad_norm": 33.96489334106445, + "learning_rate": 5e-06, + "loss": 0.8165, + "num_input_tokens_seen": 534800392, + "step": 4234 + }, + { + "epoch": 1.0861283909446546, + "loss": 0.8228338956832886, + "loss_ce": 7.999742229003459e-05, + "loss_iou": 0.384765625, + "loss_num": 0.0108642578125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 534800392, + "step": 4234 + }, + { + "epoch": 1.0863849163086, + "grad_norm": 38.15224075317383, + "learning_rate": 5e-06, + "loss": 0.8775, + "num_input_tokens_seen": 534926524, + "step": 4235 + }, + { + "epoch": 1.0863849163086, + "loss": 0.9135920405387878, + "loss_ce": 0.00026199675630778074, + "loss_iou": 0.423828125, + "loss_num": 0.013427734375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 534926524, + "step": 4235 + }, + { + "epoch": 1.0866414416725454, + "grad_norm": 53.63994216918945, + "learning_rate": 5e-06, + "loss": 0.8408, + "num_input_tokens_seen": 535054332, + "step": 4236 + }, + { + "epoch": 1.0866414416725454, + "loss": 0.7420462369918823, + "loss_ce": 0.00034697563387453556, + "loss_iou": 0.359375, + "loss_num": 0.004974365234375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 535054332, + "step": 4236 + }, + { + "epoch": 1.0868979670364907, + "grad_norm": 46.743919372558594, + "learning_rate": 5e-06, + "loss": 0.9626, + "num_input_tokens_seen": 535180964, + "step": 4237 + }, + { + "epoch": 1.0868979670364907, + "loss": 0.9385435581207275, + "loss_ce": 0.0005552737857215106, + "loss_iou": 0.435546875, + "loss_num": 0.01336669921875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 535180964, + "step": 4237 + }, + { + "epoch": 1.0871544924004362, + "grad_norm": 357.4437255859375, + "learning_rate": 5e-06, + "loss": 0.8425, + "num_input_tokens_seen": 535307400, + "step": 4238 + }, + { + "epoch": 1.0871544924004362, + "loss": 0.9378539323806763, + "loss_ce": 0.0008422537357546389, + "loss_iou": 0.43359375, + "loss_num": 0.01385498046875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 535307400, + "step": 4238 + }, + { + "epoch": 1.0874110177643814, + "grad_norm": 43.215606689453125, + "learning_rate": 5e-06, + "loss": 0.8641, + "num_input_tokens_seen": 535434540, + "step": 4239 + }, + { + "epoch": 1.0874110177643814, + "loss": 0.7945486307144165, + "loss_ce": 0.0015798923559486866, + "loss_iou": 0.373046875, + "loss_num": 0.00958251953125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 535434540, + "step": 4239 + }, + { + "epoch": 1.087667543128327, + "grad_norm": 40.43342208862305, + "learning_rate": 5e-06, + "loss": 0.7766, + "num_input_tokens_seen": 535562004, + "step": 4240 + }, + { + "epoch": 1.087667543128327, + "loss": 0.7647783756256104, + "loss_ce": 0.0006181845092214644, + "loss_iou": 0.353515625, + "loss_num": 0.01177978515625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 535562004, + "step": 4240 + }, + { + "epoch": 1.0879240684922722, + "grad_norm": 40.38063049316406, + "learning_rate": 5e-06, + "loss": 0.9133, + "num_input_tokens_seen": 535688672, + "step": 4241 + }, + { + "epoch": 1.0879240684922722, + "loss": 1.1253987550735474, + "loss_ce": 0.0008870072197169065, + "loss_iou": 0.515625, + "loss_num": 0.019287109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 535688672, + "step": 4241 + }, + { + "epoch": 1.0881805938562175, + "grad_norm": 37.272422790527344, + "learning_rate": 5e-06, + "loss": 0.8324, + "num_input_tokens_seen": 535814516, + "step": 4242 + }, + { + "epoch": 1.0881805938562175, + "loss": 0.7911187410354614, + "loss_ce": 0.00010313833627151325, + "loss_iou": 0.365234375, + "loss_num": 0.01220703125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 535814516, + "step": 4242 + }, + { + "epoch": 1.088437119220163, + "grad_norm": 42.66666793823242, + "learning_rate": 5e-06, + "loss": 0.8733, + "num_input_tokens_seen": 535941108, + "step": 4243 + }, + { + "epoch": 1.088437119220163, + "loss": 0.9083069562911987, + "loss_ce": 0.0010803421027958393, + "loss_iou": 0.42578125, + "loss_num": 0.01129150390625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 535941108, + "step": 4243 + }, + { + "epoch": 1.0886936445841082, + "grad_norm": 53.776206970214844, + "learning_rate": 5e-06, + "loss": 0.8283, + "num_input_tokens_seen": 536066972, + "step": 4244 + }, + { + "epoch": 1.0886936445841082, + "loss": 0.807137131690979, + "loss_ce": 0.0007406857330352068, + "loss_iou": 0.376953125, + "loss_num": 0.0108642578125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 536066972, + "step": 4244 + }, + { + "epoch": 1.0889501699480537, + "grad_norm": 50.006229400634766, + "learning_rate": 5e-06, + "loss": 0.8946, + "num_input_tokens_seen": 536193364, + "step": 4245 + }, + { + "epoch": 1.0889501699480537, + "loss": 0.7244573831558228, + "loss_ce": 9.214598685503006e-05, + "loss_iou": 0.345703125, + "loss_num": 0.006988525390625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 536193364, + "step": 4245 + }, + { + "epoch": 1.089206695311999, + "grad_norm": 54.94211196899414, + "learning_rate": 5e-06, + "loss": 0.8253, + "num_input_tokens_seen": 536318772, + "step": 4246 + }, + { + "epoch": 1.089206695311999, + "loss": 0.9607019424438477, + "loss_ce": 0.003182359039783478, + "loss_iou": 0.44140625, + "loss_num": 0.01513671875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 536318772, + "step": 4246 + }, + { + "epoch": 1.0894632206759443, + "grad_norm": 56.43708038330078, + "learning_rate": 5e-06, + "loss": 0.861, + "num_input_tokens_seen": 536446540, + "step": 4247 + }, + { + "epoch": 1.0894632206759443, + "loss": 0.6866933107376099, + "loss_ce": 0.00041404360672459006, + "loss_iou": 0.326171875, + "loss_num": 0.006805419921875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 536446540, + "step": 4247 + }, + { + "epoch": 1.0897197460398897, + "grad_norm": 52.314186096191406, + "learning_rate": 5e-06, + "loss": 0.7136, + "num_input_tokens_seen": 536572444, + "step": 4248 + }, + { + "epoch": 1.0897197460398897, + "loss": 0.7623040080070496, + "loss_ce": 0.0005852805334143341, + "loss_iou": 0.3671875, + "loss_num": 0.005859375, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 536572444, + "step": 4248 + }, + { + "epoch": 1.089976271403835, + "grad_norm": 44.573787689208984, + "learning_rate": 5e-06, + "loss": 0.8552, + "num_input_tokens_seen": 536698844, + "step": 4249 + }, + { + "epoch": 1.089976271403835, + "loss": 0.8431373834609985, + "loss_ce": 0.00011988347250735387, + "loss_iou": 0.40234375, + "loss_num": 0.00787353515625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 536698844, + "step": 4249 + }, + { + "epoch": 1.0902327967677805, + "grad_norm": 47.36097717285156, + "learning_rate": 5e-06, + "loss": 0.8683, + "num_input_tokens_seen": 536825940, + "step": 4250 + }, + { + "epoch": 1.0902327967677805, + "eval_icons_CIoU": 0.3023005574941635, + "eval_icons_GIoU": 0.2556134760379791, + "eval_icons_IoU": 0.4722231328487396, + "eval_icons_MAE_all": 0.028205308131873608, + "eval_icons_MAE_h": 0.03800942189991474, + "eval_icons_MAE_w": 0.05544603429734707, + "eval_icons_MAE_x_boxes": 0.05080600455403328, + "eval_icons_MAE_y_boxes": 0.0376229602843523, + "eval_icons_NUM_probability": 0.9998997449874878, + "eval_icons_inside_bbox": 0.7239583432674408, + "eval_icons_loss": 1.5769168138504028, + "eval_icons_loss_ce": 3.709676275320817e-05, + "eval_icons_loss_iou": 0.69952392578125, + "eval_icons_loss_num": 0.02767181396484375, + "eval_icons_loss_xval": 1.5380859375, + "eval_icons_runtime": 44.6499, + "eval_icons_samples_per_second": 1.12, + "eval_icons_steps_per_second": 0.045, + "num_input_tokens_seen": 536825940, + "step": 4250 + }, + { + "epoch": 1.0902327967677805, + "eval_screenspot_CIoU": 0.11266590903202693, + "eval_screenspot_GIoU": 0.09308384358882904, + "eval_screenspot_IoU": 0.29253459970156354, + "eval_screenspot_MAE_all": 0.07968296110630035, + "eval_screenspot_MAE_h": 0.07943055654565494, + "eval_screenspot_MAE_w": 0.12661905586719513, + "eval_screenspot_MAE_x_boxes": 0.10553709914286931, + "eval_screenspot_MAE_y_boxes": 0.06493873397509257, + "eval_screenspot_NUM_probability": 0.9999548594156901, + "eval_screenspot_inside_bbox": 0.6016666690508524, + "eval_screenspot_loss": 2.2541990280151367, + "eval_screenspot_loss_ce": 0.0033002846563855806, + "eval_screenspot_loss_iou": 0.9331868489583334, + "eval_screenspot_loss_num": 0.08353678385416667, + "eval_screenspot_loss_xval": 2.2835286458333335, + "eval_screenspot_runtime": 77.2232, + "eval_screenspot_samples_per_second": 1.153, + "eval_screenspot_steps_per_second": 0.039, + "num_input_tokens_seen": 536825940, + "step": 4250 + }, + { + "epoch": 1.0902327967677805, + "loss": 2.283000946044922, + "loss_ce": 0.0017511313781142235, + "loss_iou": 0.9453125, + "loss_num": 0.078125, + "loss_xval": 2.28125, + "num_input_tokens_seen": 536825940, + "step": 4250 + }, + { + "epoch": 1.0904893221317258, + "grad_norm": 50.12983703613281, + "learning_rate": 5e-06, + "loss": 1.029, + "num_input_tokens_seen": 536953460, + "step": 4251 + }, + { + "epoch": 1.0904893221317258, + "loss": 1.083077311515808, + "loss_ce": 0.002022591419517994, + "loss_iou": 0.4921875, + "loss_num": 0.01953125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 536953460, + "step": 4251 + }, + { + "epoch": 1.090745847495671, + "grad_norm": 49.1562385559082, + "learning_rate": 5e-06, + "loss": 0.8907, + "num_input_tokens_seen": 537080272, + "step": 4252 + }, + { + "epoch": 1.090745847495671, + "loss": 0.7602747678756714, + "loss_ce": 0.0005091187776997685, + "loss_iou": 0.359375, + "loss_num": 0.00836181640625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 537080272, + "step": 4252 + }, + { + "epoch": 1.0910023728596165, + "grad_norm": 53.04816436767578, + "learning_rate": 5e-06, + "loss": 0.8633, + "num_input_tokens_seen": 537206992, + "step": 4253 + }, + { + "epoch": 1.0910023728596165, + "loss": 0.8083138465881348, + "loss_ce": 0.00020841433433815837, + "loss_iou": 0.380859375, + "loss_num": 0.00921630859375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 537206992, + "step": 4253 + }, + { + "epoch": 1.0912588982235618, + "grad_norm": 49.52168273925781, + "learning_rate": 5e-06, + "loss": 1.0393, + "num_input_tokens_seen": 537332292, + "step": 4254 + }, + { + "epoch": 1.0912588982235618, + "loss": 1.0582189559936523, + "loss_ce": 0.003287315834313631, + "loss_iou": 0.47265625, + "loss_num": 0.0216064453125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 537332292, + "step": 4254 + }, + { + "epoch": 1.0915154235875073, + "grad_norm": 40.197608947753906, + "learning_rate": 5e-06, + "loss": 0.8552, + "num_input_tokens_seen": 537458684, + "step": 4255 + }, + { + "epoch": 1.0915154235875073, + "loss": 0.8571491241455078, + "loss_ce": 0.003633481217548251, + "loss_iou": 0.392578125, + "loss_num": 0.01348876953125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 537458684, + "step": 4255 + }, + { + "epoch": 1.0917719489514526, + "grad_norm": 61.588470458984375, + "learning_rate": 5e-06, + "loss": 0.8977, + "num_input_tokens_seen": 537585544, + "step": 4256 + }, + { + "epoch": 1.0917719489514526, + "loss": 1.0696187019348145, + "loss_ce": 0.0012592999264597893, + "loss_iou": 0.4765625, + "loss_num": 0.023193359375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 537585544, + "step": 4256 + }, + { + "epoch": 1.0920284743153978, + "grad_norm": 77.4262466430664, + "learning_rate": 5e-06, + "loss": 1.0143, + "num_input_tokens_seen": 537712696, + "step": 4257 + }, + { + "epoch": 1.0920284743153978, + "loss": 0.983360230922699, + "loss_ce": 0.0019149334402754903, + "loss_iou": 0.447265625, + "loss_num": 0.017333984375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 537712696, + "step": 4257 + }, + { + "epoch": 1.0922849996793433, + "grad_norm": 46.227577209472656, + "learning_rate": 5e-06, + "loss": 0.851, + "num_input_tokens_seen": 537839712, + "step": 4258 + }, + { + "epoch": 1.0922849996793433, + "loss": 0.8714098930358887, + "loss_ce": 0.0017809381242841482, + "loss_iou": 0.41015625, + "loss_num": 0.01031494140625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 537839712, + "step": 4258 + }, + { + "epoch": 1.0925415250432886, + "grad_norm": 34.097782135009766, + "learning_rate": 5e-06, + "loss": 0.8491, + "num_input_tokens_seen": 537966540, + "step": 4259 + }, + { + "epoch": 1.0925415250432886, + "loss": 0.9689818620681763, + "loss_ce": 0.003405759809538722, + "loss_iou": 0.4375, + "loss_num": 0.018310546875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 537966540, + "step": 4259 + }, + { + "epoch": 1.092798050407234, + "grad_norm": 49.35053634643555, + "learning_rate": 5e-06, + "loss": 0.8634, + "num_input_tokens_seen": 538093360, + "step": 4260 + }, + { + "epoch": 1.092798050407234, + "loss": 0.6868743896484375, + "loss_ce": 0.0003509339294396341, + "loss_iou": 0.33203125, + "loss_num": 0.00445556640625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 538093360, + "step": 4260 + }, + { + "epoch": 1.0930545757711794, + "grad_norm": 46.145225524902344, + "learning_rate": 5e-06, + "loss": 0.821, + "num_input_tokens_seen": 538219876, + "step": 4261 + }, + { + "epoch": 1.0930545757711794, + "loss": 0.7840884923934937, + "loss_ce": 0.000397077645175159, + "loss_iou": 0.37890625, + "loss_num": 0.005218505859375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 538219876, + "step": 4261 + }, + { + "epoch": 1.0933111011351246, + "grad_norm": 54.58528518676758, + "learning_rate": 5e-06, + "loss": 0.8393, + "num_input_tokens_seen": 538346804, + "step": 4262 + }, + { + "epoch": 1.0933111011351246, + "loss": 0.8000120520591736, + "loss_ce": 0.0011838998179882765, + "loss_iou": 0.384765625, + "loss_num": 0.006011962890625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 538346804, + "step": 4262 + }, + { + "epoch": 1.0935676264990701, + "grad_norm": 51.060523986816406, + "learning_rate": 5e-06, + "loss": 0.8966, + "num_input_tokens_seen": 538472808, + "step": 4263 + }, + { + "epoch": 1.0935676264990701, + "loss": 0.923377275466919, + "loss_ce": 0.0019906063098460436, + "loss_iou": 0.435546875, + "loss_num": 0.01019287109375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 538472808, + "step": 4263 + }, + { + "epoch": 1.0938241518630154, + "grad_norm": 55.52627944946289, + "learning_rate": 5e-06, + "loss": 0.8305, + "num_input_tokens_seen": 538598704, + "step": 4264 + }, + { + "epoch": 1.0938241518630154, + "loss": 0.9450574517250061, + "loss_ce": 0.0026745933573693037, + "loss_iou": 0.439453125, + "loss_num": 0.01239013671875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 538598704, + "step": 4264 + }, + { + "epoch": 1.094080677226961, + "grad_norm": 50.153961181640625, + "learning_rate": 5e-06, + "loss": 0.944, + "num_input_tokens_seen": 538724636, + "step": 4265 + }, + { + "epoch": 1.094080677226961, + "loss": 0.8521740436553955, + "loss_ce": 0.007447536569088697, + "loss_iou": 0.3984375, + "loss_num": 0.009765625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 538724636, + "step": 4265 + }, + { + "epoch": 1.0943372025909062, + "grad_norm": 49.00498580932617, + "learning_rate": 5e-06, + "loss": 0.8281, + "num_input_tokens_seen": 538850212, + "step": 4266 + }, + { + "epoch": 1.0943372025909062, + "loss": 0.7847800254821777, + "loss_ce": 0.000600374536588788, + "loss_iou": 0.37109375, + "loss_num": 0.00836181640625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 538850212, + "step": 4266 + }, + { + "epoch": 1.0945937279548514, + "grad_norm": 18.410188674926758, + "learning_rate": 5e-06, + "loss": 0.7488, + "num_input_tokens_seen": 538976580, + "step": 4267 + }, + { + "epoch": 1.0945937279548514, + "loss": 0.8457469940185547, + "loss_ce": 0.000776328903157264, + "loss_iou": 0.392578125, + "loss_num": 0.01171875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 538976580, + "step": 4267 + }, + { + "epoch": 1.094850253318797, + "grad_norm": 24.589529037475586, + "learning_rate": 5e-06, + "loss": 0.8897, + "num_input_tokens_seen": 539102592, + "step": 4268 + }, + { + "epoch": 1.094850253318797, + "loss": 0.6910320520401001, + "loss_ce": 0.0020671954844146967, + "loss_iou": 0.31640625, + "loss_num": 0.01116943359375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 539102592, + "step": 4268 + }, + { + "epoch": 1.0951067786827422, + "grad_norm": 28.972549438476562, + "learning_rate": 5e-06, + "loss": 0.8679, + "num_input_tokens_seen": 539227848, + "step": 4269 + }, + { + "epoch": 1.0951067786827422, + "loss": 0.779083251953125, + "loss_ce": 0.0017394808819517493, + "loss_iou": 0.357421875, + "loss_num": 0.0120849609375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 539227848, + "step": 4269 + }, + { + "epoch": 1.0953633040466877, + "grad_norm": 31.57301902770996, + "learning_rate": 5e-06, + "loss": 0.8545, + "num_input_tokens_seen": 539354052, + "step": 4270 + }, + { + "epoch": 1.0953633040466877, + "loss": 0.7170563340187073, + "loss_ce": 0.00025950040435418487, + "loss_iou": 0.330078125, + "loss_num": 0.01129150390625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 539354052, + "step": 4270 + }, + { + "epoch": 1.095619829410633, + "grad_norm": 31.407758712768555, + "learning_rate": 5e-06, + "loss": 0.8916, + "num_input_tokens_seen": 539479636, + "step": 4271 + }, + { + "epoch": 1.095619829410633, + "loss": 0.9890812635421753, + "loss_ce": 0.0025089557748287916, + "loss_iou": 0.42578125, + "loss_num": 0.027099609375, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 539479636, + "step": 4271 + }, + { + "epoch": 1.0958763547745782, + "grad_norm": 37.442626953125, + "learning_rate": 5e-06, + "loss": 0.8417, + "num_input_tokens_seen": 539605992, + "step": 4272 + }, + { + "epoch": 1.0958763547745782, + "loss": 0.6854358911514282, + "loss_ce": 0.0006214665481820703, + "loss_iou": 0.322265625, + "loss_num": 0.007659912109375, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 539605992, + "step": 4272 + }, + { + "epoch": 1.0961328801385237, + "grad_norm": 39.12180709838867, + "learning_rate": 5e-06, + "loss": 1.0539, + "num_input_tokens_seen": 539731668, + "step": 4273 + }, + { + "epoch": 1.0961328801385237, + "loss": 0.7987058758735657, + "loss_ce": 0.0003660315414890647, + "loss_iou": 0.36328125, + "loss_num": 0.01446533203125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 539731668, + "step": 4273 + }, + { + "epoch": 1.096389405502469, + "grad_norm": 47.7686882019043, + "learning_rate": 5e-06, + "loss": 0.8861, + "num_input_tokens_seen": 539857632, + "step": 4274 + }, + { + "epoch": 1.096389405502469, + "loss": 0.9397404193878174, + "loss_ce": 0.0012638181215152144, + "loss_iou": 0.42578125, + "loss_num": 0.017333984375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 539857632, + "step": 4274 + }, + { + "epoch": 1.0966459308664145, + "grad_norm": 37.07966613769531, + "learning_rate": 5e-06, + "loss": 0.9588, + "num_input_tokens_seen": 539983688, + "step": 4275 + }, + { + "epoch": 1.0966459308664145, + "loss": 0.9746381044387817, + "loss_ce": 0.0005169447977095842, + "loss_iou": 0.4453125, + "loss_num": 0.0164794921875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 539983688, + "step": 4275 + }, + { + "epoch": 1.0969024562303598, + "grad_norm": 22.157163619995117, + "learning_rate": 5e-06, + "loss": 0.8029, + "num_input_tokens_seen": 540110124, + "step": 4276 + }, + { + "epoch": 1.0969024562303598, + "loss": 0.9074290990829468, + "loss_ce": 0.0023997726384550333, + "loss_iou": 0.40625, + "loss_num": 0.018310546875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 540110124, + "step": 4276 + }, + { + "epoch": 1.0971589815943052, + "grad_norm": 24.439579010009766, + "learning_rate": 5e-06, + "loss": 0.9176, + "num_input_tokens_seen": 540235712, + "step": 4277 + }, + { + "epoch": 1.0971589815943052, + "loss": 0.9007980823516846, + "loss_ce": 0.0011398524511605501, + "loss_iou": 0.41796875, + "loss_num": 0.012939453125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 540235712, + "step": 4277 + }, + { + "epoch": 1.0974155069582505, + "grad_norm": 31.011384963989258, + "learning_rate": 5e-06, + "loss": 0.8186, + "num_input_tokens_seen": 540362596, + "step": 4278 + }, + { + "epoch": 1.0974155069582505, + "loss": 0.7741881608963013, + "loss_ce": 0.0017271911492571235, + "loss_iou": 0.3671875, + "loss_num": 0.00726318359375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 540362596, + "step": 4278 + }, + { + "epoch": 1.0976720323221958, + "grad_norm": 30.156375885009766, + "learning_rate": 5e-06, + "loss": 0.9196, + "num_input_tokens_seen": 540488564, + "step": 4279 + }, + { + "epoch": 1.0976720323221958, + "loss": 0.7317163944244385, + "loss_ce": 0.00027108192443847656, + "loss_iou": 0.345703125, + "loss_num": 0.00799560546875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 540488564, + "step": 4279 + }, + { + "epoch": 1.0979285576861413, + "grad_norm": 21.90422248840332, + "learning_rate": 5e-06, + "loss": 0.8118, + "num_input_tokens_seen": 540613576, + "step": 4280 + }, + { + "epoch": 1.0979285576861413, + "loss": 0.8804684281349182, + "loss_ce": 0.0032711708918213844, + "loss_iou": 0.404296875, + "loss_num": 0.01385498046875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 540613576, + "step": 4280 + }, + { + "epoch": 1.0981850830500866, + "grad_norm": 29.854904174804688, + "learning_rate": 5e-06, + "loss": 0.9381, + "num_input_tokens_seen": 540740020, + "step": 4281 + }, + { + "epoch": 1.0981850830500866, + "loss": 0.7842676043510437, + "loss_ce": 0.0003320628893561661, + "loss_iou": 0.369140625, + "loss_num": 0.0096435546875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 540740020, + "step": 4281 + }, + { + "epoch": 1.0984416084140318, + "grad_norm": 39.7161750793457, + "learning_rate": 5e-06, + "loss": 0.8353, + "num_input_tokens_seen": 540866572, + "step": 4282 + }, + { + "epoch": 1.0984416084140318, + "loss": 0.8786364197731018, + "loss_ce": 0.006566146854311228, + "loss_iou": 0.400390625, + "loss_num": 0.01422119140625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 540866572, + "step": 4282 + }, + { + "epoch": 1.0986981337779773, + "grad_norm": 43.448307037353516, + "learning_rate": 5e-06, + "loss": 0.8013, + "num_input_tokens_seen": 540991756, + "step": 4283 + }, + { + "epoch": 1.0986981337779773, + "loss": 0.8713977336883545, + "loss_ce": 0.0025012330152094364, + "loss_iou": 0.404296875, + "loss_num": 0.0120849609375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 540991756, + "step": 4283 + }, + { + "epoch": 1.0989546591419226, + "grad_norm": 60.8759651184082, + "learning_rate": 5e-06, + "loss": 0.8596, + "num_input_tokens_seen": 541117180, + "step": 4284 + }, + { + "epoch": 1.0989546591419226, + "loss": 0.8601726293563843, + "loss_ce": 0.0005534642841666937, + "loss_iou": 0.400390625, + "loss_num": 0.01153564453125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 541117180, + "step": 4284 + }, + { + "epoch": 1.099211184505868, + "grad_norm": 40.30632781982422, + "learning_rate": 5e-06, + "loss": 0.9197, + "num_input_tokens_seen": 541243388, + "step": 4285 + }, + { + "epoch": 1.099211184505868, + "loss": 0.9647684693336487, + "loss_ce": 0.0013895528391003609, + "loss_iou": 0.427734375, + "loss_num": 0.021728515625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 541243388, + "step": 4285 + }, + { + "epoch": 1.0994677098698133, + "grad_norm": 29.083728790283203, + "learning_rate": 5e-06, + "loss": 0.8316, + "num_input_tokens_seen": 541368884, + "step": 4286 + }, + { + "epoch": 1.0994677098698133, + "loss": 1.078627109527588, + "loss_ce": 0.0014786063693463802, + "loss_iou": 0.474609375, + "loss_num": 0.0260009765625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 541368884, + "step": 4286 + }, + { + "epoch": 1.0997242352337588, + "grad_norm": 33.91740036010742, + "learning_rate": 5e-06, + "loss": 0.9254, + "num_input_tokens_seen": 541495028, + "step": 4287 + }, + { + "epoch": 1.0997242352337588, + "loss": 0.7508026361465454, + "loss_ce": 0.0003143171197734773, + "loss_iou": 0.35546875, + "loss_num": 0.007476806640625, + "loss_xval": 0.75, + "num_input_tokens_seen": 541495028, + "step": 4287 + }, + { + "epoch": 1.099980760597704, + "grad_norm": 70.21348571777344, + "learning_rate": 5e-06, + "loss": 0.9061, + "num_input_tokens_seen": 541622888, + "step": 4288 + }, + { + "epoch": 1.099980760597704, + "loss": 0.8699834942817688, + "loss_ce": 0.0008428778382949531, + "loss_iou": 0.400390625, + "loss_num": 0.0137939453125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 541622888, + "step": 4288 + }, + { + "epoch": 1.1002372859616494, + "grad_norm": 53.37354278564453, + "learning_rate": 5e-06, + "loss": 0.9855, + "num_input_tokens_seen": 541749388, + "step": 4289 + }, + { + "epoch": 1.1002372859616494, + "loss": 1.0263888835906982, + "loss_ce": 0.00026589585468173027, + "loss_iou": 0.45703125, + "loss_num": 0.022216796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 541749388, + "step": 4289 + }, + { + "epoch": 1.1004938113255949, + "grad_norm": 28.36046028137207, + "learning_rate": 5e-06, + "loss": 0.9164, + "num_input_tokens_seen": 541875696, + "step": 4290 + }, + { + "epoch": 1.1004938113255949, + "loss": 1.0823887586593628, + "loss_ce": 0.0008457691874355078, + "loss_iou": 0.4921875, + "loss_num": 0.0194091796875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 541875696, + "step": 4290 + }, + { + "epoch": 1.1007503366895401, + "grad_norm": 19.23958396911621, + "learning_rate": 5e-06, + "loss": 0.8282, + "num_input_tokens_seen": 542001120, + "step": 4291 + }, + { + "epoch": 1.1007503366895401, + "loss": 0.8432102799415588, + "loss_ce": 0.0009251004667021334, + "loss_iou": 0.390625, + "loss_num": 0.01251220703125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 542001120, + "step": 4291 + }, + { + "epoch": 1.1010068620534856, + "grad_norm": 23.499225616455078, + "learning_rate": 5e-06, + "loss": 0.7738, + "num_input_tokens_seen": 542128356, + "step": 4292 + }, + { + "epoch": 1.1010068620534856, + "loss": 0.707275390625, + "loss_ce": 0.0012206961400806904, + "loss_iou": 0.33203125, + "loss_num": 0.00848388671875, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 542128356, + "step": 4292 + }, + { + "epoch": 1.101263387417431, + "grad_norm": 27.7711181640625, + "learning_rate": 5e-06, + "loss": 0.885, + "num_input_tokens_seen": 542253812, + "step": 4293 + }, + { + "epoch": 1.101263387417431, + "loss": 0.9279822707176208, + "loss_ce": 0.0004920315695926547, + "loss_iou": 0.4375, + "loss_num": 0.01025390625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 542253812, + "step": 4293 + }, + { + "epoch": 1.1015199127813762, + "grad_norm": 31.392070770263672, + "learning_rate": 5e-06, + "loss": 0.8842, + "num_input_tokens_seen": 542379648, + "step": 4294 + }, + { + "epoch": 1.1015199127813762, + "loss": 1.1123526096343994, + "loss_ce": 0.006151500158011913, + "loss_iou": 0.5, + "loss_num": 0.021484375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 542379648, + "step": 4294 + }, + { + "epoch": 1.1017764381453217, + "grad_norm": 28.39433479309082, + "learning_rate": 5e-06, + "loss": 0.8804, + "num_input_tokens_seen": 542505184, + "step": 4295 + }, + { + "epoch": 1.1017764381453217, + "loss": 0.9964362382888794, + "loss_ce": 0.00034247367875650525, + "loss_iou": 0.45703125, + "loss_num": 0.016357421875, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 542505184, + "step": 4295 + }, + { + "epoch": 1.102032963509267, + "grad_norm": 23.99445343017578, + "learning_rate": 5e-06, + "loss": 0.7401, + "num_input_tokens_seen": 542631336, + "step": 4296 + }, + { + "epoch": 1.102032963509267, + "loss": 0.7546756267547607, + "loss_ce": 0.002478391397744417, + "loss_iou": 0.353515625, + "loss_num": 0.00885009765625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 542631336, + "step": 4296 + }, + { + "epoch": 1.1022894888732124, + "grad_norm": 26.39970588684082, + "learning_rate": 5e-06, + "loss": 0.908, + "num_input_tokens_seen": 542758280, + "step": 4297 + }, + { + "epoch": 1.1022894888732124, + "loss": 0.8445360064506531, + "loss_ce": 0.005668806377798319, + "loss_iou": 0.369140625, + "loss_num": 0.019775390625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 542758280, + "step": 4297 + }, + { + "epoch": 1.1025460142371577, + "grad_norm": 18.303077697753906, + "learning_rate": 5e-06, + "loss": 0.9792, + "num_input_tokens_seen": 542883528, + "step": 4298 + }, + { + "epoch": 1.1025460142371577, + "loss": 1.1276503801345825, + "loss_ce": 0.00020899563969578594, + "loss_iou": 0.51953125, + "loss_num": 0.017822265625, + "loss_xval": 1.125, + "num_input_tokens_seen": 542883528, + "step": 4298 + }, + { + "epoch": 1.102802539601103, + "grad_norm": 23.197484970092773, + "learning_rate": 5e-06, + "loss": 0.774, + "num_input_tokens_seen": 543010680, + "step": 4299 + }, + { + "epoch": 1.102802539601103, + "loss": 0.900610625743866, + "loss_ce": 0.0036379829980432987, + "loss_iou": 0.4140625, + "loss_num": 0.0137939453125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 543010680, + "step": 4299 + }, + { + "epoch": 1.1030590649650485, + "grad_norm": 25.959684371948242, + "learning_rate": 5e-06, + "loss": 0.9188, + "num_input_tokens_seen": 543137940, + "step": 4300 + }, + { + "epoch": 1.1030590649650485, + "loss": 1.1069316864013672, + "loss_ce": 0.0024394006468355656, + "loss_iou": 0.5, + "loss_num": 0.020263671875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 543137940, + "step": 4300 + }, + { + "epoch": 1.1033155903289937, + "grad_norm": 28.61568260192871, + "learning_rate": 5e-06, + "loss": 0.7797, + "num_input_tokens_seen": 543263064, + "step": 4301 + }, + { + "epoch": 1.1033155903289937, + "loss": 0.7574151754379272, + "loss_ce": 0.0015558232553303242, + "loss_iou": 0.361328125, + "loss_num": 0.006561279296875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 543263064, + "step": 4301 + }, + { + "epoch": 1.1035721156929392, + "grad_norm": 36.354576110839844, + "learning_rate": 5e-06, + "loss": 0.9081, + "num_input_tokens_seen": 543390520, + "step": 4302 + }, + { + "epoch": 1.1035721156929392, + "loss": 0.7381107807159424, + "loss_ce": 0.0008061382686719298, + "loss_iou": 0.353515625, + "loss_num": 0.005950927734375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 543390520, + "step": 4302 + }, + { + "epoch": 1.1038286410568845, + "grad_norm": 45.602210998535156, + "learning_rate": 5e-06, + "loss": 0.9825, + "num_input_tokens_seen": 543516312, + "step": 4303 + }, + { + "epoch": 1.1038286410568845, + "loss": 0.8675527572631836, + "loss_ce": 0.0006093965494073927, + "loss_iou": 0.404296875, + "loss_num": 0.01202392578125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 543516312, + "step": 4303 + }, + { + "epoch": 1.1040851664208298, + "grad_norm": 91.75167083740234, + "learning_rate": 5e-06, + "loss": 0.8958, + "num_input_tokens_seen": 543642356, + "step": 4304 + }, + { + "epoch": 1.1040851664208298, + "loss": 1.0318009853363037, + "loss_ce": 0.0010393330594524741, + "loss_iou": 0.458984375, + "loss_num": 0.02294921875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 543642356, + "step": 4304 + }, + { + "epoch": 1.1043416917847753, + "grad_norm": 50.707435607910156, + "learning_rate": 5e-06, + "loss": 0.8799, + "num_input_tokens_seen": 543770108, + "step": 4305 + }, + { + "epoch": 1.1043416917847753, + "loss": 0.827349841594696, + "loss_ce": 0.0009337865631096065, + "loss_iou": 0.384765625, + "loss_num": 0.01165771484375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 543770108, + "step": 4305 + }, + { + "epoch": 1.1045982171487205, + "grad_norm": 43.76702880859375, + "learning_rate": 5e-06, + "loss": 0.77, + "num_input_tokens_seen": 543895832, + "step": 4306 + }, + { + "epoch": 1.1045982171487205, + "loss": 0.727180540561676, + "loss_ce": 0.0020829015411436558, + "loss_iou": 0.345703125, + "loss_num": 0.006744384765625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 543895832, + "step": 4306 + }, + { + "epoch": 1.104854742512666, + "grad_norm": 37.87589645385742, + "learning_rate": 5e-06, + "loss": 0.846, + "num_input_tokens_seen": 544022276, + "step": 4307 + }, + { + "epoch": 1.104854742512666, + "loss": 0.8235594630241394, + "loss_ce": 0.000317238038405776, + "loss_iou": 0.3828125, + "loss_num": 0.0113525390625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 544022276, + "step": 4307 + }, + { + "epoch": 1.1051112678766113, + "grad_norm": 43.56135177612305, + "learning_rate": 5e-06, + "loss": 0.9064, + "num_input_tokens_seen": 544148976, + "step": 4308 + }, + { + "epoch": 1.1051112678766113, + "loss": 0.8506561517715454, + "loss_ce": 0.0003143421490676701, + "loss_iou": 0.396484375, + "loss_num": 0.01141357421875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 544148976, + "step": 4308 + }, + { + "epoch": 1.1053677932405566, + "grad_norm": 43.42847442626953, + "learning_rate": 5e-06, + "loss": 0.8481, + "num_input_tokens_seen": 544274960, + "step": 4309 + }, + { + "epoch": 1.1053677932405566, + "loss": 0.7657710909843445, + "loss_ce": 0.0006343668792396784, + "loss_iou": 0.369140625, + "loss_num": 0.005401611328125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 544274960, + "step": 4309 + }, + { + "epoch": 1.105624318604502, + "grad_norm": 35.625308990478516, + "learning_rate": 5e-06, + "loss": 0.8821, + "num_input_tokens_seen": 544400900, + "step": 4310 + }, + { + "epoch": 1.105624318604502, + "loss": 1.0252317190170288, + "loss_ce": 0.0005735588492825627, + "loss_iou": 0.46875, + "loss_num": 0.0172119140625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 544400900, + "step": 4310 + }, + { + "epoch": 1.1058808439684473, + "grad_norm": 48.231136322021484, + "learning_rate": 5e-06, + "loss": 0.8841, + "num_input_tokens_seen": 544526980, + "step": 4311 + }, + { + "epoch": 1.1058808439684473, + "loss": 0.9105788469314575, + "loss_ce": 0.002619875594973564, + "loss_iou": 0.40234375, + "loss_num": 0.0205078125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 544526980, + "step": 4311 + }, + { + "epoch": 1.1061373693323928, + "grad_norm": 55.390167236328125, + "learning_rate": 5e-06, + "loss": 0.8102, + "num_input_tokens_seen": 544653632, + "step": 4312 + }, + { + "epoch": 1.1061373693323928, + "loss": 0.788372278213501, + "loss_ce": 0.003704322502017021, + "loss_iou": 0.36328125, + "loss_num": 0.01165771484375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 544653632, + "step": 4312 + }, + { + "epoch": 1.106393894696338, + "grad_norm": 35.091670989990234, + "learning_rate": 5e-06, + "loss": 1.0184, + "num_input_tokens_seen": 544779380, + "step": 4313 + }, + { + "epoch": 1.106393894696338, + "loss": 0.9373239278793335, + "loss_ce": 0.0010446407832205296, + "loss_iou": 0.431640625, + "loss_num": 0.01470947265625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 544779380, + "step": 4313 + }, + { + "epoch": 1.1066504200602834, + "grad_norm": 30.846092224121094, + "learning_rate": 5e-06, + "loss": 0.8725, + "num_input_tokens_seen": 544905788, + "step": 4314 + }, + { + "epoch": 1.1066504200602834, + "loss": 0.9354002475738525, + "loss_ce": 0.00034161435905843973, + "loss_iou": 0.44140625, + "loss_num": 0.01007080078125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 544905788, + "step": 4314 + }, + { + "epoch": 1.1069069454242288, + "grad_norm": 47.69921875, + "learning_rate": 5e-06, + "loss": 0.8348, + "num_input_tokens_seen": 545032256, + "step": 4315 + }, + { + "epoch": 1.1069069454242288, + "loss": 0.8526338338851929, + "loss_ce": 0.001803714083507657, + "loss_iou": 0.390625, + "loss_num": 0.01416015625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 545032256, + "step": 4315 + }, + { + "epoch": 1.1071634707881741, + "grad_norm": 84.14291381835938, + "learning_rate": 5e-06, + "loss": 0.9106, + "num_input_tokens_seen": 545159704, + "step": 4316 + }, + { + "epoch": 1.1071634707881741, + "loss": 0.8474912643432617, + "loss_ce": 0.001055678236298263, + "loss_iou": 0.396484375, + "loss_num": 0.01092529296875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 545159704, + "step": 4316 + }, + { + "epoch": 1.1074199961521196, + "grad_norm": 45.74492645263672, + "learning_rate": 5e-06, + "loss": 0.8932, + "num_input_tokens_seen": 545285364, + "step": 4317 + }, + { + "epoch": 1.1074199961521196, + "loss": 0.8470907211303711, + "loss_ce": 0.0008993504452519119, + "loss_iou": 0.3984375, + "loss_num": 0.0098876953125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 545285364, + "step": 4317 + }, + { + "epoch": 1.1076765215160649, + "grad_norm": 41.20510482788086, + "learning_rate": 5e-06, + "loss": 0.8304, + "num_input_tokens_seen": 545412204, + "step": 4318 + }, + { + "epoch": 1.1076765215160649, + "loss": 1.028680682182312, + "loss_ce": 0.0013370290398597717, + "loss_iou": 0.4765625, + "loss_num": 0.01544189453125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 545412204, + "step": 4318 + }, + { + "epoch": 1.1079330468800102, + "grad_norm": 49.92390060424805, + "learning_rate": 5e-06, + "loss": 0.8388, + "num_input_tokens_seen": 545538508, + "step": 4319 + }, + { + "epoch": 1.1079330468800102, + "loss": 0.8438367247581482, + "loss_ce": 8.673957199789584e-05, + "loss_iou": 0.404296875, + "loss_num": 0.00701904296875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 545538508, + "step": 4319 + }, + { + "epoch": 1.1081895722439556, + "grad_norm": 41.34299850463867, + "learning_rate": 5e-06, + "loss": 0.9655, + "num_input_tokens_seen": 545665048, + "step": 4320 + }, + { + "epoch": 1.1081895722439556, + "loss": 0.9609396457672119, + "loss_ce": 0.0021993510890752077, + "loss_iou": 0.43359375, + "loss_num": 0.0179443359375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 545665048, + "step": 4320 + }, + { + "epoch": 1.108446097607901, + "grad_norm": 42.18601989746094, + "learning_rate": 5e-06, + "loss": 0.7357, + "num_input_tokens_seen": 545790824, + "step": 4321 + }, + { + "epoch": 1.108446097607901, + "loss": 0.7437649965286255, + "loss_ce": 0.0001126947026932612, + "loss_iou": 0.3515625, + "loss_num": 0.00787353515625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 545790824, + "step": 4321 + }, + { + "epoch": 1.1087026229718464, + "grad_norm": 55.76960372924805, + "learning_rate": 5e-06, + "loss": 0.9227, + "num_input_tokens_seen": 545915996, + "step": 4322 + }, + { + "epoch": 1.1087026229718464, + "loss": 0.8311585187911987, + "loss_ce": 0.00010382455366197973, + "loss_iou": 0.39453125, + "loss_num": 0.0087890625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 545915996, + "step": 4322 + }, + { + "epoch": 1.1089591483357917, + "grad_norm": 47.57638168334961, + "learning_rate": 5e-06, + "loss": 0.9503, + "num_input_tokens_seen": 546042392, + "step": 4323 + }, + { + "epoch": 1.1089591483357917, + "loss": 1.0606603622436523, + "loss_ce": 0.0006017279229126871, + "loss_iou": 0.484375, + "loss_num": 0.0185546875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 546042392, + "step": 4323 + }, + { + "epoch": 1.1092156736997372, + "grad_norm": 23.349313735961914, + "learning_rate": 5e-06, + "loss": 0.7945, + "num_input_tokens_seen": 546168972, + "step": 4324 + }, + { + "epoch": 1.1092156736997372, + "loss": 0.9148021936416626, + "loss_ce": 0.0007397367735393345, + "loss_iou": 0.427734375, + "loss_num": 0.01177978515625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 546168972, + "step": 4324 + }, + { + "epoch": 1.1094721990636824, + "grad_norm": 30.05824089050293, + "learning_rate": 5e-06, + "loss": 0.91, + "num_input_tokens_seen": 546294420, + "step": 4325 + }, + { + "epoch": 1.1094721990636824, + "loss": 1.0125209093093872, + "loss_ce": 0.0008021629182621837, + "loss_iou": 0.48828125, + "loss_num": 0.007476806640625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 546294420, + "step": 4325 + }, + { + "epoch": 1.1097287244276277, + "grad_norm": 38.878639221191406, + "learning_rate": 5e-06, + "loss": 0.838, + "num_input_tokens_seen": 546420720, + "step": 4326 + }, + { + "epoch": 1.1097287244276277, + "loss": 0.8379665613174438, + "loss_ce": 7.59649119572714e-05, + "loss_iou": 0.396484375, + "loss_num": 0.009033203125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 546420720, + "step": 4326 + }, + { + "epoch": 1.1099852497915732, + "grad_norm": 36.23270034790039, + "learning_rate": 5e-06, + "loss": 0.9309, + "num_input_tokens_seen": 546545608, + "step": 4327 + }, + { + "epoch": 1.1099852497915732, + "loss": 1.082977056503296, + "loss_ce": 0.0010678669204935431, + "loss_iou": 0.50390625, + "loss_num": 0.0150146484375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 546545608, + "step": 4327 + }, + { + "epoch": 1.1102417751555185, + "grad_norm": 30.02294921875, + "learning_rate": 5e-06, + "loss": 0.9254, + "num_input_tokens_seen": 546671504, + "step": 4328 + }, + { + "epoch": 1.1102417751555185, + "loss": 1.0396860837936401, + "loss_ce": 0.00013527771807275712, + "loss_iou": 0.48828125, + "loss_num": 0.0125732421875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 546671504, + "step": 4328 + }, + { + "epoch": 1.110498300519464, + "grad_norm": 38.41831970214844, + "learning_rate": 5e-06, + "loss": 0.7806, + "num_input_tokens_seen": 546798088, + "step": 4329 + }, + { + "epoch": 1.110498300519464, + "loss": 0.7459325790405273, + "loss_ce": 0.0005712311249226332, + "loss_iou": 0.34375, + "loss_num": 0.011962890625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 546798088, + "step": 4329 + }, + { + "epoch": 1.1107548258834092, + "grad_norm": 64.88684844970703, + "learning_rate": 5e-06, + "loss": 0.7683, + "num_input_tokens_seen": 546925776, + "step": 4330 + }, + { + "epoch": 1.1107548258834092, + "loss": 0.7801439762115479, + "loss_ce": 0.0010911995777860284, + "loss_iou": 0.365234375, + "loss_num": 0.0096435546875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 546925776, + "step": 4330 + }, + { + "epoch": 1.1110113512473545, + "grad_norm": 48.632659912109375, + "learning_rate": 5e-06, + "loss": 1.0049, + "num_input_tokens_seen": 547053120, + "step": 4331 + }, + { + "epoch": 1.1110113512473545, + "loss": 0.9485212564468384, + "loss_ce": 0.00027907907497137785, + "loss_iou": 0.44921875, + "loss_num": 0.010009765625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 547053120, + "step": 4331 + }, + { + "epoch": 1.1112678766113, + "grad_norm": 36.08280563354492, + "learning_rate": 5e-06, + "loss": 0.8086, + "num_input_tokens_seen": 547179528, + "step": 4332 + }, + { + "epoch": 1.1112678766113, + "loss": 0.7682315707206726, + "loss_ce": 0.0008975934470072389, + "loss_iou": 0.369140625, + "loss_num": 0.005523681640625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 547179528, + "step": 4332 + }, + { + "epoch": 1.1115244019752453, + "grad_norm": 23.5977840423584, + "learning_rate": 5e-06, + "loss": 0.8733, + "num_input_tokens_seen": 547305424, + "step": 4333 + }, + { + "epoch": 1.1115244019752453, + "loss": 0.7772883176803589, + "loss_ce": 0.00043281036778353155, + "loss_iou": 0.365234375, + "loss_num": 0.009033203125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 547305424, + "step": 4333 + }, + { + "epoch": 1.1117809273391908, + "grad_norm": 22.18503761291504, + "learning_rate": 5e-06, + "loss": 0.8892, + "num_input_tokens_seen": 547431468, + "step": 4334 + }, + { + "epoch": 1.1117809273391908, + "loss": 0.8320704698562622, + "loss_ce": 0.0005274790455587208, + "loss_iou": 0.375, + "loss_num": 0.0162353515625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 547431468, + "step": 4334 + }, + { + "epoch": 1.112037452703136, + "grad_norm": 29.73309326171875, + "learning_rate": 5e-06, + "loss": 0.8016, + "num_input_tokens_seen": 547558356, + "step": 4335 + }, + { + "epoch": 1.112037452703136, + "loss": 0.8512779474258423, + "loss_ce": 0.0006919445586390793, + "loss_iou": 0.40234375, + "loss_num": 0.00885009765625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 547558356, + "step": 4335 + }, + { + "epoch": 1.1122939780670813, + "grad_norm": 39.437191009521484, + "learning_rate": 5e-06, + "loss": 0.9088, + "num_input_tokens_seen": 547683576, + "step": 4336 + }, + { + "epoch": 1.1122939780670813, + "loss": 1.0593878030776978, + "loss_ce": 0.0007940260693430901, + "loss_iou": 0.48046875, + "loss_num": 0.0198974609375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 547683576, + "step": 4336 + }, + { + "epoch": 1.1125505034310268, + "grad_norm": 43.902976989746094, + "learning_rate": 5e-06, + "loss": 0.8705, + "num_input_tokens_seen": 547809848, + "step": 4337 + }, + { + "epoch": 1.1125505034310268, + "loss": 0.778790295124054, + "loss_ce": 0.002178991213440895, + "loss_iou": 0.359375, + "loss_num": 0.01190185546875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 547809848, + "step": 4337 + }, + { + "epoch": 1.112807028794972, + "grad_norm": 63.915199279785156, + "learning_rate": 5e-06, + "loss": 0.9543, + "num_input_tokens_seen": 547936576, + "step": 4338 + }, + { + "epoch": 1.112807028794972, + "loss": 1.026183009147644, + "loss_ce": 0.0007924425881356001, + "loss_iou": 0.46484375, + "loss_num": 0.0191650390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 547936576, + "step": 4338 + }, + { + "epoch": 1.1130635541589176, + "grad_norm": 41.193145751953125, + "learning_rate": 5e-06, + "loss": 0.922, + "num_input_tokens_seen": 548061560, + "step": 4339 + }, + { + "epoch": 1.1130635541589176, + "loss": 0.8346478939056396, + "loss_ce": 0.0006635297322645783, + "loss_iou": 0.392578125, + "loss_num": 0.009765625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 548061560, + "step": 4339 + }, + { + "epoch": 1.1133200795228628, + "grad_norm": 15.18467903137207, + "learning_rate": 5e-06, + "loss": 0.824, + "num_input_tokens_seen": 548187144, + "step": 4340 + }, + { + "epoch": 1.1133200795228628, + "loss": 0.7874307036399841, + "loss_ce": 0.000809607794508338, + "loss_iou": 0.35546875, + "loss_num": 0.01507568359375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 548187144, + "step": 4340 + }, + { + "epoch": 1.113576604886808, + "grad_norm": 16.0417423248291, + "learning_rate": 5e-06, + "loss": 0.8983, + "num_input_tokens_seen": 548312424, + "step": 4341 + }, + { + "epoch": 1.113576604886808, + "loss": 1.070455551147461, + "loss_ce": 0.002096222247928381, + "loss_iou": 0.48828125, + "loss_num": 0.01806640625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 548312424, + "step": 4341 + }, + { + "epoch": 1.1138331302507536, + "grad_norm": 29.132429122924805, + "learning_rate": 5e-06, + "loss": 0.8498, + "num_input_tokens_seen": 548439744, + "step": 4342 + }, + { + "epoch": 1.1138331302507536, + "loss": 1.0245627164840698, + "loss_ce": 0.0035666569601744413, + "loss_iou": 0.4453125, + "loss_num": 0.0262451171875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 548439744, + "step": 4342 + }, + { + "epoch": 1.1140896556146989, + "grad_norm": 57.43910217285156, + "learning_rate": 5e-06, + "loss": 0.8075, + "num_input_tokens_seen": 548565996, + "step": 4343 + }, + { + "epoch": 1.1140896556146989, + "loss": 0.7767149806022644, + "loss_ce": 0.00034781708382070065, + "loss_iou": 0.359375, + "loss_num": 0.011474609375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 548565996, + "step": 4343 + }, + { + "epoch": 1.1143461809786444, + "grad_norm": 46.432334899902344, + "learning_rate": 5e-06, + "loss": 0.9115, + "num_input_tokens_seen": 548691336, + "step": 4344 + }, + { + "epoch": 1.1143461809786444, + "loss": 0.9542741775512695, + "loss_ce": 0.00017260274034924805, + "loss_iou": 0.447265625, + "loss_num": 0.0123291015625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 548691336, + "step": 4344 + }, + { + "epoch": 1.1146027063425896, + "grad_norm": 45.6448974609375, + "learning_rate": 5e-06, + "loss": 0.8134, + "num_input_tokens_seen": 548818332, + "step": 4345 + }, + { + "epoch": 1.1146027063425896, + "loss": 0.8319808840751648, + "loss_ce": 0.001902785967104137, + "loss_iou": 0.376953125, + "loss_num": 0.01519775390625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 548818332, + "step": 4345 + }, + { + "epoch": 1.114859231706535, + "grad_norm": 44.381229400634766, + "learning_rate": 5e-06, + "loss": 0.8669, + "num_input_tokens_seen": 548945232, + "step": 4346 + }, + { + "epoch": 1.114859231706535, + "loss": 0.8497058153152466, + "loss_ce": 0.0005846631247550249, + "loss_iou": 0.39453125, + "loss_num": 0.01171875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 548945232, + "step": 4346 + }, + { + "epoch": 1.1151157570704804, + "grad_norm": 50.7393798828125, + "learning_rate": 5e-06, + "loss": 0.9095, + "num_input_tokens_seen": 549071388, + "step": 4347 + }, + { + "epoch": 1.1151157570704804, + "loss": 0.875673770904541, + "loss_ce": 0.0006737565854564309, + "loss_iou": 0.416015625, + "loss_num": 0.00836181640625, + "loss_xval": 0.875, + "num_input_tokens_seen": 549071388, + "step": 4347 + }, + { + "epoch": 1.1153722824344257, + "grad_norm": 42.75398635864258, + "learning_rate": 5e-06, + "loss": 0.8838, + "num_input_tokens_seen": 549199352, + "step": 4348 + }, + { + "epoch": 1.1153722824344257, + "loss": 0.9345332384109497, + "loss_ce": 0.0011835902696475387, + "loss_iou": 0.4140625, + "loss_num": 0.0208740234375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 549199352, + "step": 4348 + }, + { + "epoch": 1.1156288077983711, + "grad_norm": 42.85823440551758, + "learning_rate": 5e-06, + "loss": 0.7838, + "num_input_tokens_seen": 549325656, + "step": 4349 + }, + { + "epoch": 1.1156288077983711, + "loss": 0.7038822174072266, + "loss_ce": 0.0014896478969603777, + "loss_iou": 0.33984375, + "loss_num": 0.0047607421875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 549325656, + "step": 4349 + }, + { + "epoch": 1.1158853331623164, + "grad_norm": 49.983821868896484, + "learning_rate": 5e-06, + "loss": 0.7953, + "num_input_tokens_seen": 549452284, + "step": 4350 + }, + { + "epoch": 1.1158853331623164, + "loss": 0.7098771929740906, + "loss_ce": 0.00016040733316913247, + "loss_iou": 0.341796875, + "loss_num": 0.005615234375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 549452284, + "step": 4350 + }, + { + "epoch": 1.1161418585262617, + "grad_norm": 52.36488723754883, + "learning_rate": 5e-06, + "loss": 0.8923, + "num_input_tokens_seen": 549579272, + "step": 4351 + }, + { + "epoch": 1.1161418585262617, + "loss": 0.99235999584198, + "loss_ce": 0.0001725416659610346, + "loss_iou": 0.44921875, + "loss_num": 0.0185546875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 549579272, + "step": 4351 + }, + { + "epoch": 1.1163983838902072, + "grad_norm": 42.4118766784668, + "learning_rate": 5e-06, + "loss": 1.0615, + "num_input_tokens_seen": 549706340, + "step": 4352 + }, + { + "epoch": 1.1163983838902072, + "loss": 1.2037432193756104, + "loss_ce": 0.002571369521319866, + "loss_iou": 0.52734375, + "loss_num": 0.0294189453125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 549706340, + "step": 4352 + }, + { + "epoch": 1.1166549092541525, + "grad_norm": 40.351776123046875, + "learning_rate": 5e-06, + "loss": 0.8098, + "num_input_tokens_seen": 549833780, + "step": 4353 + }, + { + "epoch": 1.1166549092541525, + "loss": 0.8293738961219788, + "loss_ce": 0.001248902059160173, + "loss_iou": 0.3828125, + "loss_num": 0.0123291015625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 549833780, + "step": 4353 + }, + { + "epoch": 1.116911434618098, + "grad_norm": 48.80207824707031, + "learning_rate": 5e-06, + "loss": 0.9824, + "num_input_tokens_seen": 549961556, + "step": 4354 + }, + { + "epoch": 1.116911434618098, + "loss": 1.1541458368301392, + "loss_ce": 0.001802100334316492, + "loss_iou": 0.53125, + "loss_num": 0.0181884765625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 549961556, + "step": 4354 + }, + { + "epoch": 1.1171679599820432, + "grad_norm": 46.95970153808594, + "learning_rate": 5e-06, + "loss": 0.7989, + "num_input_tokens_seen": 550088136, + "step": 4355 + }, + { + "epoch": 1.1171679599820432, + "loss": 0.9718947410583496, + "loss_ce": 0.003144698217511177, + "loss_iou": 0.421875, + "loss_num": 0.0252685546875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 550088136, + "step": 4355 + }, + { + "epoch": 1.1174244853459885, + "grad_norm": 40.880516052246094, + "learning_rate": 5e-06, + "loss": 0.9483, + "num_input_tokens_seen": 550214248, + "step": 4356 + }, + { + "epoch": 1.1174244853459885, + "loss": 0.7997308969497681, + "loss_ce": 0.001146951923146844, + "loss_iou": 0.359375, + "loss_num": 0.0159912109375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 550214248, + "step": 4356 + }, + { + "epoch": 1.117681010709934, + "grad_norm": 44.80527877807617, + "learning_rate": 5e-06, + "loss": 0.9267, + "num_input_tokens_seen": 550341020, + "step": 4357 + }, + { + "epoch": 1.117681010709934, + "loss": 0.9524534344673157, + "loss_ce": 0.0012815501540899277, + "loss_iou": 0.431640625, + "loss_num": 0.017578125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 550341020, + "step": 4357 + }, + { + "epoch": 1.1179375360738792, + "grad_norm": 44.82581329345703, + "learning_rate": 5e-06, + "loss": 0.8198, + "num_input_tokens_seen": 550467328, + "step": 4358 + }, + { + "epoch": 1.1179375360738792, + "loss": 0.8152981996536255, + "loss_ce": 0.0013334134127944708, + "loss_iou": 0.384765625, + "loss_num": 0.00927734375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 550467328, + "step": 4358 + }, + { + "epoch": 1.1181940614378247, + "grad_norm": 45.86843490600586, + "learning_rate": 5e-06, + "loss": 0.8333, + "num_input_tokens_seen": 550592892, + "step": 4359 + }, + { + "epoch": 1.1181940614378247, + "loss": 0.8417699337005615, + "loss_ce": 0.002170381834730506, + "loss_iou": 0.40234375, + "loss_num": 0.006805419921875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 550592892, + "step": 4359 + }, + { + "epoch": 1.11845058680177, + "grad_norm": 31.5723819732666, + "learning_rate": 5e-06, + "loss": 0.9584, + "num_input_tokens_seen": 550718484, + "step": 4360 + }, + { + "epoch": 1.11845058680177, + "loss": 0.8775990009307861, + "loss_ce": 0.0004017712199129164, + "loss_iou": 0.41015625, + "loss_num": 0.0111083984375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 550718484, + "step": 4360 + }, + { + "epoch": 1.1187071121657155, + "grad_norm": 25.42947769165039, + "learning_rate": 5e-06, + "loss": 0.8516, + "num_input_tokens_seen": 550844716, + "step": 4361 + }, + { + "epoch": 1.1187071121657155, + "loss": 0.9348640441894531, + "loss_ce": 0.001026150188408792, + "loss_iou": 0.421875, + "loss_num": 0.018310546875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 550844716, + "step": 4361 + }, + { + "epoch": 1.1189636375296608, + "grad_norm": 37.81428909301758, + "learning_rate": 5e-06, + "loss": 0.8755, + "num_input_tokens_seen": 550971596, + "step": 4362 + }, + { + "epoch": 1.1189636375296608, + "loss": 0.9227215051651001, + "loss_ce": 0.0013348252978175879, + "loss_iou": 0.42578125, + "loss_num": 0.013916015625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 550971596, + "step": 4362 + }, + { + "epoch": 1.119220162893606, + "grad_norm": 50.570152282714844, + "learning_rate": 5e-06, + "loss": 0.8338, + "num_input_tokens_seen": 551096720, + "step": 4363 + }, + { + "epoch": 1.119220162893606, + "loss": 0.8676013946533203, + "loss_ce": 0.0023670385126024485, + "loss_iou": 0.39453125, + "loss_num": 0.0150146484375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 551096720, + "step": 4363 + }, + { + "epoch": 1.1194766882575515, + "grad_norm": 48.67875289916992, + "learning_rate": 5e-06, + "loss": 0.8746, + "num_input_tokens_seen": 551222668, + "step": 4364 + }, + { + "epoch": 1.1194766882575515, + "loss": 0.7082058191299438, + "loss_ce": 0.0016628922894597054, + "loss_iou": 0.326171875, + "loss_num": 0.0108642578125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 551222668, + "step": 4364 + }, + { + "epoch": 1.1197332136214968, + "grad_norm": 55.56336212158203, + "learning_rate": 5e-06, + "loss": 0.9132, + "num_input_tokens_seen": 551349372, + "step": 4365 + }, + { + "epoch": 1.1197332136214968, + "loss": 0.9336636662483215, + "loss_ce": 0.0010464995866641402, + "loss_iou": 0.42578125, + "loss_num": 0.01611328125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 551349372, + "step": 4365 + }, + { + "epoch": 1.119989738985442, + "grad_norm": 57.901336669921875, + "learning_rate": 5e-06, + "loss": 0.9009, + "num_input_tokens_seen": 551476308, + "step": 4366 + }, + { + "epoch": 1.119989738985442, + "loss": 1.0617588758468628, + "loss_ce": 0.001700288848951459, + "loss_iou": 0.50390625, + "loss_num": 0.0107421875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 551476308, + "step": 4366 + }, + { + "epoch": 1.1202462643493876, + "grad_norm": 53.584716796875, + "learning_rate": 5e-06, + "loss": 0.9056, + "num_input_tokens_seen": 551602616, + "step": 4367 + }, + { + "epoch": 1.1202462643493876, + "loss": 0.891726553440094, + "loss_ce": 0.00012495940609369427, + "loss_iou": 0.423828125, + "loss_num": 0.0087890625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 551602616, + "step": 4367 + }, + { + "epoch": 1.1205027897133328, + "grad_norm": 44.50483703613281, + "learning_rate": 5e-06, + "loss": 0.92, + "num_input_tokens_seen": 551728364, + "step": 4368 + }, + { + "epoch": 1.1205027897133328, + "loss": 0.9803924560546875, + "loss_ce": 0.00041199952829629183, + "loss_iou": 0.45703125, + "loss_num": 0.01336669921875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 551728364, + "step": 4368 + }, + { + "epoch": 1.1207593150772783, + "grad_norm": 37.173744201660156, + "learning_rate": 5e-06, + "loss": 0.8246, + "num_input_tokens_seen": 551854700, + "step": 4369 + }, + { + "epoch": 1.1207593150772783, + "loss": 0.9545555114746094, + "loss_ce": 0.0004539501969702542, + "loss_iou": 0.439453125, + "loss_num": 0.0155029296875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 551854700, + "step": 4369 + }, + { + "epoch": 1.1210158404412236, + "grad_norm": 44.56998825073242, + "learning_rate": 5e-06, + "loss": 0.9118, + "num_input_tokens_seen": 551980588, + "step": 4370 + }, + { + "epoch": 1.1210158404412236, + "loss": 0.8184912800788879, + "loss_ce": 0.00013190042227506638, + "loss_iou": 0.38671875, + "loss_num": 0.0091552734375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 551980588, + "step": 4370 + }, + { + "epoch": 1.121272365805169, + "grad_norm": 38.713741302490234, + "learning_rate": 5e-06, + "loss": 0.9, + "num_input_tokens_seen": 552106076, + "step": 4371 + }, + { + "epoch": 1.121272365805169, + "loss": 0.8636499047279358, + "loss_ce": 0.0001245239982381463, + "loss_iou": 0.400390625, + "loss_num": 0.01239013671875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 552106076, + "step": 4371 + }, + { + "epoch": 1.1215288911691144, + "grad_norm": 30.864137649536133, + "learning_rate": 5e-06, + "loss": 0.9205, + "num_input_tokens_seen": 552231528, + "step": 4372 + }, + { + "epoch": 1.1215288911691144, + "loss": 0.9976637959480286, + "loss_ce": 0.001081775058992207, + "loss_iou": 0.455078125, + "loss_num": 0.01708984375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 552231528, + "step": 4372 + }, + { + "epoch": 1.1217854165330596, + "grad_norm": 29.91069984436035, + "learning_rate": 5e-06, + "loss": 0.913, + "num_input_tokens_seen": 552357680, + "step": 4373 + }, + { + "epoch": 1.1217854165330596, + "loss": 0.9998218417167664, + "loss_ce": 0.0007983618415892124, + "loss_iou": 0.45703125, + "loss_num": 0.016845703125, + "loss_xval": 1.0, + "num_input_tokens_seen": 552357680, + "step": 4373 + }, + { + "epoch": 1.1220419418970051, + "grad_norm": 34.38151550292969, + "learning_rate": 5e-06, + "loss": 0.8808, + "num_input_tokens_seen": 552483836, + "step": 4374 + }, + { + "epoch": 1.1220419418970051, + "loss": 0.7624691724777222, + "loss_ce": 0.0012386748567223549, + "loss_iou": 0.361328125, + "loss_num": 0.00799560546875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 552483836, + "step": 4374 + }, + { + "epoch": 1.1222984672609504, + "grad_norm": 41.12311935424805, + "learning_rate": 5e-06, + "loss": 0.8607, + "num_input_tokens_seen": 552609852, + "step": 4375 + }, + { + "epoch": 1.1222984672609504, + "loss": 0.8001327514648438, + "loss_ce": 0.0003280199889559299, + "loss_iou": 0.376953125, + "loss_num": 0.009521484375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 552609852, + "step": 4375 + }, + { + "epoch": 1.1225549926248959, + "grad_norm": 49.13603210449219, + "learning_rate": 5e-06, + "loss": 0.8807, + "num_input_tokens_seen": 552735908, + "step": 4376 + }, + { + "epoch": 1.1225549926248959, + "loss": 0.9168971180915833, + "loss_ce": 0.0018580653704702854, + "loss_iou": 0.42578125, + "loss_num": 0.0130615234375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 552735908, + "step": 4376 + }, + { + "epoch": 1.1228115179888412, + "grad_norm": 64.6429443359375, + "learning_rate": 5e-06, + "loss": 0.8666, + "num_input_tokens_seen": 552862552, + "step": 4377 + }, + { + "epoch": 1.1228115179888412, + "loss": 1.022615909576416, + "loss_ce": 0.002596435835584998, + "loss_iou": 0.46484375, + "loss_num": 0.018310546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 552862552, + "step": 4377 + }, + { + "epoch": 1.1230680433527864, + "grad_norm": 39.24332046508789, + "learning_rate": 5e-06, + "loss": 0.8474, + "num_input_tokens_seen": 552987532, + "step": 4378 + }, + { + "epoch": 1.1230680433527864, + "loss": 0.8478530645370483, + "loss_ce": 0.0009292476461268961, + "loss_iou": 0.3984375, + "loss_num": 0.01007080078125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 552987532, + "step": 4378 + }, + { + "epoch": 1.123324568716732, + "grad_norm": 19.18568992614746, + "learning_rate": 5e-06, + "loss": 0.7981, + "num_input_tokens_seen": 553113336, + "step": 4379 + }, + { + "epoch": 1.123324568716732, + "loss": 0.6219620108604431, + "loss_ce": 0.00013583345571532845, + "loss_iou": 0.30078125, + "loss_num": 0.003692626953125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 553113336, + "step": 4379 + }, + { + "epoch": 1.1235810940806772, + "grad_norm": 20.040058135986328, + "learning_rate": 5e-06, + "loss": 0.8888, + "num_input_tokens_seen": 553240752, + "step": 4380 + }, + { + "epoch": 1.1235810940806772, + "loss": 0.734830379486084, + "loss_ce": 0.00021118266158737242, + "loss_iou": 0.31640625, + "loss_num": 0.0206298828125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 553240752, + "step": 4380 + }, + { + "epoch": 1.1238376194446227, + "grad_norm": 32.27317810058594, + "learning_rate": 5e-06, + "loss": 0.7715, + "num_input_tokens_seen": 553366632, + "step": 4381 + }, + { + "epoch": 1.1238376194446227, + "loss": 0.9068415760993958, + "loss_ce": 0.002544699003919959, + "loss_iou": 0.40625, + "loss_num": 0.018310546875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 553366632, + "step": 4381 + }, + { + "epoch": 1.124094144808568, + "grad_norm": 30.0147647857666, + "learning_rate": 5e-06, + "loss": 0.8307, + "num_input_tokens_seen": 553492428, + "step": 4382 + }, + { + "epoch": 1.124094144808568, + "loss": 0.8747435808181763, + "loss_ce": 0.0012084178160876036, + "loss_iou": 0.40625, + "loss_num": 0.012451171875, + "loss_xval": 0.875, + "num_input_tokens_seen": 553492428, + "step": 4382 + }, + { + "epoch": 1.1243506701725132, + "grad_norm": 30.49001693725586, + "learning_rate": 5e-06, + "loss": 0.874, + "num_input_tokens_seen": 553618004, + "step": 4383 + }, + { + "epoch": 1.1243506701725132, + "loss": 0.7560674548149109, + "loss_ce": 0.00045218784362077713, + "loss_iou": 0.353515625, + "loss_num": 0.0098876953125, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 553618004, + "step": 4383 + }, + { + "epoch": 1.1246071955364587, + "grad_norm": 25.693103790283203, + "learning_rate": 5e-06, + "loss": 0.8422, + "num_input_tokens_seen": 553744816, + "step": 4384 + }, + { + "epoch": 1.1246071955364587, + "loss": 0.9814960360527039, + "loss_ce": 0.0002948415349237621, + "loss_iou": 0.4453125, + "loss_num": 0.01806640625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 553744816, + "step": 4384 + }, + { + "epoch": 1.124863720900404, + "grad_norm": 32.014095306396484, + "learning_rate": 5e-06, + "loss": 0.8295, + "num_input_tokens_seen": 553872164, + "step": 4385 + }, + { + "epoch": 1.124863720900404, + "loss": 0.6938352584838867, + "loss_ce": 0.0046262736432254314, + "loss_iou": 0.328125, + "loss_num": 0.006500244140625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 553872164, + "step": 4385 + }, + { + "epoch": 1.1251202462643495, + "grad_norm": 38.43335723876953, + "learning_rate": 5e-06, + "loss": 0.9303, + "num_input_tokens_seen": 553998316, + "step": 4386 + }, + { + "epoch": 1.1251202462643495, + "loss": 0.8111484050750732, + "loss_ce": 0.003531200345605612, + "loss_iou": 0.38671875, + "loss_num": 0.00634765625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 553998316, + "step": 4386 + }, + { + "epoch": 1.1253767716282947, + "grad_norm": 39.179443359375, + "learning_rate": 5e-06, + "loss": 0.7466, + "num_input_tokens_seen": 554123560, + "step": 4387 + }, + { + "epoch": 1.1253767716282947, + "loss": 0.8397973775863647, + "loss_ce": 0.0009301979443989694, + "loss_iou": 0.40234375, + "loss_num": 0.007080078125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 554123560, + "step": 4387 + }, + { + "epoch": 1.12563329699224, + "grad_norm": 41.7021369934082, + "learning_rate": 5e-06, + "loss": 0.8533, + "num_input_tokens_seen": 554249508, + "step": 4388 + }, + { + "epoch": 1.12563329699224, + "loss": 0.7980391979217529, + "loss_ce": 0.005070447456091642, + "loss_iou": 0.369140625, + "loss_num": 0.01092529296875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 554249508, + "step": 4388 + }, + { + "epoch": 1.1258898223561855, + "grad_norm": 36.926239013671875, + "learning_rate": 5e-06, + "loss": 0.8864, + "num_input_tokens_seen": 554376552, + "step": 4389 + }, + { + "epoch": 1.1258898223561855, + "loss": 0.6297857761383057, + "loss_ce": 0.002344384789466858, + "loss_iou": 0.287109375, + "loss_num": 0.01031494140625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 554376552, + "step": 4389 + }, + { + "epoch": 1.1261463477201308, + "grad_norm": 51.78299331665039, + "learning_rate": 5e-06, + "loss": 0.8808, + "num_input_tokens_seen": 554502756, + "step": 4390 + }, + { + "epoch": 1.1261463477201308, + "loss": 1.0998618602752686, + "loss_ce": 0.0012289967853575945, + "loss_iou": 0.494140625, + "loss_num": 0.02197265625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 554502756, + "step": 4390 + }, + { + "epoch": 1.1264028730840763, + "grad_norm": 52.92314529418945, + "learning_rate": 5e-06, + "loss": 0.9373, + "num_input_tokens_seen": 554627876, + "step": 4391 + }, + { + "epoch": 1.1264028730840763, + "loss": 0.8387633562088013, + "loss_ce": 0.0018493086099624634, + "loss_iou": 0.37109375, + "loss_num": 0.0194091796875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 554627876, + "step": 4391 + }, + { + "epoch": 1.1266593984480215, + "grad_norm": 69.74037170410156, + "learning_rate": 5e-06, + "loss": 0.9283, + "num_input_tokens_seen": 554754072, + "step": 4392 + }, + { + "epoch": 1.1266593984480215, + "loss": 0.7326191067695618, + "loss_ce": 0.002638638950884342, + "loss_iou": 0.353515625, + "loss_num": 0.0050048828125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 554754072, + "step": 4392 + }, + { + "epoch": 1.1269159238119668, + "grad_norm": 53.04520797729492, + "learning_rate": 5e-06, + "loss": 0.8486, + "num_input_tokens_seen": 554881260, + "step": 4393 + }, + { + "epoch": 1.1269159238119668, + "loss": 0.7891746163368225, + "loss_ce": 0.00011215827544219792, + "loss_iou": 0.380859375, + "loss_num": 0.00537109375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 554881260, + "step": 4393 + }, + { + "epoch": 1.1271724491759123, + "grad_norm": 24.915786743164062, + "learning_rate": 5e-06, + "loss": 0.8125, + "num_input_tokens_seen": 555008056, + "step": 4394 + }, + { + "epoch": 1.1271724491759123, + "loss": 0.8816965222358704, + "loss_ce": 0.0010812953114509583, + "loss_iou": 0.3984375, + "loss_num": 0.0166015625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 555008056, + "step": 4394 + }, + { + "epoch": 1.1274289745398576, + "grad_norm": 162.7960968017578, + "learning_rate": 5e-06, + "loss": 0.8329, + "num_input_tokens_seen": 555135048, + "step": 4395 + }, + { + "epoch": 1.1274289745398576, + "loss": 0.8506788015365601, + "loss_ce": 9.289111767429858e-05, + "loss_iou": 0.408203125, + "loss_num": 0.00701904296875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 555135048, + "step": 4395 + }, + { + "epoch": 1.127685499903803, + "grad_norm": 34.88717269897461, + "learning_rate": 5e-06, + "loss": 0.9598, + "num_input_tokens_seen": 555261412, + "step": 4396 + }, + { + "epoch": 1.127685499903803, + "loss": 1.1063413619995117, + "loss_ce": 0.0013608213048428297, + "loss_iou": 0.5, + "loss_num": 0.02099609375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 555261412, + "step": 4396 + }, + { + "epoch": 1.1279420252677483, + "grad_norm": 39.1777458190918, + "learning_rate": 5e-06, + "loss": 0.9162, + "num_input_tokens_seen": 555388296, + "step": 4397 + }, + { + "epoch": 1.1279420252677483, + "loss": 0.8866813778877258, + "loss_ce": 0.0006950302049517632, + "loss_iou": 0.41015625, + "loss_num": 0.01318359375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 555388296, + "step": 4397 + }, + { + "epoch": 1.1281985506316938, + "grad_norm": 37.644840240478516, + "learning_rate": 5e-06, + "loss": 0.8344, + "num_input_tokens_seen": 555514060, + "step": 4398 + }, + { + "epoch": 1.1281985506316938, + "loss": 1.0176427364349365, + "loss_ce": 0.002017771825194359, + "loss_iou": 0.453125, + "loss_num": 0.0216064453125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 555514060, + "step": 4398 + }, + { + "epoch": 1.128455075995639, + "grad_norm": 29.90350341796875, + "learning_rate": 5e-06, + "loss": 0.9394, + "num_input_tokens_seen": 555638844, + "step": 4399 + }, + { + "epoch": 1.128455075995639, + "loss": 0.8645536303520203, + "loss_ce": 0.0007840826874598861, + "loss_iou": 0.392578125, + "loss_num": 0.0157470703125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 555638844, + "step": 4399 + }, + { + "epoch": 1.1287116013595844, + "grad_norm": 23.48908042907715, + "learning_rate": 5e-06, + "loss": 0.9904, + "num_input_tokens_seen": 555765948, + "step": 4400 + }, + { + "epoch": 1.1287116013595844, + "loss": 0.8629847764968872, + "loss_ce": 0.002877362072467804, + "loss_iou": 0.3984375, + "loss_num": 0.01287841796875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 555765948, + "step": 4400 + }, + { + "epoch": 1.1289681267235299, + "grad_norm": 36.1854133605957, + "learning_rate": 5e-06, + "loss": 0.801, + "num_input_tokens_seen": 555892572, + "step": 4401 + }, + { + "epoch": 1.1289681267235299, + "loss": 0.651606559753418, + "loss_ce": 0.0012159384787082672, + "loss_iou": 0.30859375, + "loss_num": 0.006317138671875, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 555892572, + "step": 4401 + }, + { + "epoch": 1.1292246520874751, + "grad_norm": 47.392578125, + "learning_rate": 5e-06, + "loss": 0.9113, + "num_input_tokens_seen": 556017728, + "step": 4402 + }, + { + "epoch": 1.1292246520874751, + "loss": 0.7943363189697266, + "loss_ce": 0.0011234241537749767, + "loss_iou": 0.361328125, + "loss_num": 0.01416015625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 556017728, + "step": 4402 + }, + { + "epoch": 1.1294811774514204, + "grad_norm": 40.92268753051758, + "learning_rate": 5e-06, + "loss": 0.8955, + "num_input_tokens_seen": 556143444, + "step": 4403 + }, + { + "epoch": 1.1294811774514204, + "loss": 0.8459879159927368, + "loss_ce": 0.00028476043371483684, + "loss_iou": 0.40625, + "loss_num": 0.006439208984375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 556143444, + "step": 4403 + }, + { + "epoch": 1.129737702815366, + "grad_norm": 21.690141677856445, + "learning_rate": 5e-06, + "loss": 0.9149, + "num_input_tokens_seen": 556270628, + "step": 4404 + }, + { + "epoch": 1.129737702815366, + "loss": 0.7214297652244568, + "loss_ce": 0.0004824622010346502, + "loss_iou": 0.33984375, + "loss_num": 0.00830078125, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 556270628, + "step": 4404 + }, + { + "epoch": 1.1299942281793112, + "grad_norm": 26.732696533203125, + "learning_rate": 5e-06, + "loss": 0.7908, + "num_input_tokens_seen": 556397372, + "step": 4405 + }, + { + "epoch": 1.1299942281793112, + "loss": 0.790381908416748, + "loss_ce": 0.0008311310084536672, + "loss_iou": 0.373046875, + "loss_num": 0.0087890625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 556397372, + "step": 4405 + }, + { + "epoch": 1.1302507535432567, + "grad_norm": 23.73853302001953, + "learning_rate": 5e-06, + "loss": 0.857, + "num_input_tokens_seen": 556522372, + "step": 4406 + }, + { + "epoch": 1.1302507535432567, + "loss": 0.951476514339447, + "loss_ce": 0.00030466634780168533, + "loss_iou": 0.431640625, + "loss_num": 0.017333984375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 556522372, + "step": 4406 + }, + { + "epoch": 1.130507278907202, + "grad_norm": 32.32231903076172, + "learning_rate": 5e-06, + "loss": 0.9398, + "num_input_tokens_seen": 556648344, + "step": 4407 + }, + { + "epoch": 1.130507278907202, + "loss": 1.028932809829712, + "loss_ce": 0.00354212848469615, + "loss_iou": 0.478515625, + "loss_num": 0.01385498046875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 556648344, + "step": 4407 + }, + { + "epoch": 1.1307638042711474, + "grad_norm": 27.304912567138672, + "learning_rate": 5e-06, + "loss": 0.7536, + "num_input_tokens_seen": 556775648, + "step": 4408 + }, + { + "epoch": 1.1307638042711474, + "loss": 0.7329480648040771, + "loss_ce": 0.0002820421941578388, + "loss_iou": 0.3359375, + "loss_num": 0.01165771484375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 556775648, + "step": 4408 + }, + { + "epoch": 1.1310203296350927, + "grad_norm": 41.16486358642578, + "learning_rate": 5e-06, + "loss": 0.9261, + "num_input_tokens_seen": 556903264, + "step": 4409 + }, + { + "epoch": 1.1310203296350927, + "loss": 1.2765600681304932, + "loss_ce": 0.001169364433735609, + "loss_iou": 0.56640625, + "loss_num": 0.029296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 556903264, + "step": 4409 + }, + { + "epoch": 1.131276854999038, + "grad_norm": 51.45564651489258, + "learning_rate": 5e-06, + "loss": 0.8066, + "num_input_tokens_seen": 557028928, + "step": 4410 + }, + { + "epoch": 1.131276854999038, + "loss": 0.8082805871963501, + "loss_ce": 0.005546221509575844, + "loss_iou": 0.37109375, + "loss_num": 0.01239013671875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 557028928, + "step": 4410 + }, + { + "epoch": 1.1315333803629835, + "grad_norm": 45.31657409667969, + "learning_rate": 5e-06, + "loss": 0.9209, + "num_input_tokens_seen": 557155300, + "step": 4411 + }, + { + "epoch": 1.1315333803629835, + "loss": 0.908155620098114, + "loss_ce": 0.0019056496676057577, + "loss_iou": 0.4140625, + "loss_num": 0.0150146484375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 557155300, + "step": 4411 + }, + { + "epoch": 1.1317899057269287, + "grad_norm": 57.716121673583984, + "learning_rate": 5e-06, + "loss": 0.8792, + "num_input_tokens_seen": 557280880, + "step": 4412 + }, + { + "epoch": 1.1317899057269287, + "loss": 0.8125715851783752, + "loss_ce": 0.002024728339165449, + "loss_iou": 0.37109375, + "loss_num": 0.0137939453125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 557280880, + "step": 4412 + }, + { + "epoch": 1.132046431090874, + "grad_norm": 42.386470794677734, + "learning_rate": 5e-06, + "loss": 0.8834, + "num_input_tokens_seen": 557406952, + "step": 4413 + }, + { + "epoch": 1.132046431090874, + "loss": 0.994838297367096, + "loss_ce": 0.0036273705773055553, + "loss_iou": 0.447265625, + "loss_num": 0.01904296875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 557406952, + "step": 4413 + }, + { + "epoch": 1.1323029564548195, + "grad_norm": 32.808624267578125, + "learning_rate": 5e-06, + "loss": 0.8203, + "num_input_tokens_seen": 557531964, + "step": 4414 + }, + { + "epoch": 1.1323029564548195, + "loss": 0.7963255643844604, + "loss_ce": 0.0011595336254686117, + "loss_iou": 0.37890625, + "loss_num": 0.00732421875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 557531964, + "step": 4414 + }, + { + "epoch": 1.1325594818187648, + "grad_norm": 33.4691047668457, + "learning_rate": 5e-06, + "loss": 0.9026, + "num_input_tokens_seen": 557658032, + "step": 4415 + }, + { + "epoch": 1.1325594818187648, + "loss": 0.7694133520126343, + "loss_ce": 0.00037034720298834145, + "loss_iou": 0.3515625, + "loss_num": 0.0133056640625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 557658032, + "step": 4415 + }, + { + "epoch": 1.1328160071827102, + "grad_norm": 55.896968841552734, + "learning_rate": 5e-06, + "loss": 0.8796, + "num_input_tokens_seen": 557785212, + "step": 4416 + }, + { + "epoch": 1.1328160071827102, + "loss": 0.9487991333007812, + "loss_ce": 0.0003128039534203708, + "loss_iou": 0.453125, + "loss_num": 0.008544921875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 557785212, + "step": 4416 + }, + { + "epoch": 1.1330725325466555, + "grad_norm": 52.81481170654297, + "learning_rate": 5e-06, + "loss": 0.9621, + "num_input_tokens_seen": 557911936, + "step": 4417 + }, + { + "epoch": 1.1330725325466555, + "loss": 1.0358659029006958, + "loss_ce": 0.00022133860329631716, + "loss_iou": 0.474609375, + "loss_num": 0.017333984375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 557911936, + "step": 4417 + }, + { + "epoch": 1.133329057910601, + "grad_norm": 34.914764404296875, + "learning_rate": 5e-06, + "loss": 0.9367, + "num_input_tokens_seen": 558039780, + "step": 4418 + }, + { + "epoch": 1.133329057910601, + "loss": 0.8766589760780334, + "loss_ce": 0.0006824140436947346, + "loss_iou": 0.404296875, + "loss_num": 0.01324462890625, + "loss_xval": 0.875, + "num_input_tokens_seen": 558039780, + "step": 4418 + }, + { + "epoch": 1.1335855832745463, + "grad_norm": 47.31360626220703, + "learning_rate": 5e-06, + "loss": 0.9257, + "num_input_tokens_seen": 558165848, + "step": 4419 + }, + { + "epoch": 1.1335855832745463, + "loss": 0.7454118728637695, + "loss_ce": 0.0012712456518784165, + "loss_iou": 0.345703125, + "loss_num": 0.0107421875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 558165848, + "step": 4419 + }, + { + "epoch": 1.1338421086384916, + "grad_norm": 59.62784957885742, + "learning_rate": 5e-06, + "loss": 0.8289, + "num_input_tokens_seen": 558292360, + "step": 4420 + }, + { + "epoch": 1.1338421086384916, + "loss": 0.8426828384399414, + "loss_ce": 0.0023508346639573574, + "loss_iou": 0.388671875, + "loss_num": 0.01239013671875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 558292360, + "step": 4420 + }, + { + "epoch": 1.134098634002437, + "grad_norm": 50.734474182128906, + "learning_rate": 5e-06, + "loss": 0.8246, + "num_input_tokens_seen": 558418764, + "step": 4421 + }, + { + "epoch": 1.134098634002437, + "loss": 0.8006922006607056, + "loss_ce": 0.0008874908089637756, + "loss_iou": 0.3671875, + "loss_num": 0.01300048828125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 558418764, + "step": 4421 + }, + { + "epoch": 1.1343551593663823, + "grad_norm": 38.48472213745117, + "learning_rate": 5e-06, + "loss": 0.9217, + "num_input_tokens_seen": 558544664, + "step": 4422 + }, + { + "epoch": 1.1343551593663823, + "loss": 0.8721931576728821, + "loss_ce": 0.001099409768357873, + "loss_iou": 0.39453125, + "loss_num": 0.0167236328125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 558544664, + "step": 4422 + }, + { + "epoch": 1.1346116847303278, + "grad_norm": 33.49497985839844, + "learning_rate": 5e-06, + "loss": 0.9448, + "num_input_tokens_seen": 558669664, + "step": 4423 + }, + { + "epoch": 1.1346116847303278, + "loss": 1.0355424880981445, + "loss_ce": 0.0011186428600922227, + "loss_iou": 0.482421875, + "loss_num": 0.0140380859375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 558669664, + "step": 4423 + }, + { + "epoch": 1.134868210094273, + "grad_norm": 29.18581199645996, + "learning_rate": 5e-06, + "loss": 0.7851, + "num_input_tokens_seen": 558795288, + "step": 4424 + }, + { + "epoch": 1.134868210094273, + "loss": 0.745862603187561, + "loss_ce": 0.0002570961369201541, + "loss_iou": 0.34765625, + "loss_num": 0.010009765625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 558795288, + "step": 4424 + }, + { + "epoch": 1.1351247354582183, + "grad_norm": 33.792850494384766, + "learning_rate": 5e-06, + "loss": 0.8425, + "num_input_tokens_seen": 558921084, + "step": 4425 + }, + { + "epoch": 1.1351247354582183, + "loss": 0.677505612373352, + "loss_ce": 0.0002594981633592397, + "loss_iou": 0.326171875, + "loss_num": 0.005218505859375, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 558921084, + "step": 4425 + }, + { + "epoch": 1.1353812608221638, + "grad_norm": 46.56937026977539, + "learning_rate": 5e-06, + "loss": 0.8168, + "num_input_tokens_seen": 559048396, + "step": 4426 + }, + { + "epoch": 1.1353812608221638, + "loss": 0.7433246970176697, + "loss_ce": 0.0035785753279924393, + "loss_iou": 0.349609375, + "loss_num": 0.0078125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 559048396, + "step": 4426 + }, + { + "epoch": 1.135637786186109, + "grad_norm": 41.06705093383789, + "learning_rate": 5e-06, + "loss": 0.9126, + "num_input_tokens_seen": 559174348, + "step": 4427 + }, + { + "epoch": 1.135637786186109, + "loss": 1.0798604488372803, + "loss_ce": 0.0002707089006435126, + "loss_iou": 0.5078125, + "loss_num": 0.0135498046875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 559174348, + "step": 4427 + }, + { + "epoch": 1.1358943115500546, + "grad_norm": 40.635093688964844, + "learning_rate": 5e-06, + "loss": 0.8407, + "num_input_tokens_seen": 559302100, + "step": 4428 + }, + { + "epoch": 1.1358943115500546, + "loss": 0.8367603421211243, + "loss_ce": 0.0010670038172975183, + "loss_iou": 0.388671875, + "loss_num": 0.0115966796875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 559302100, + "step": 4428 + }, + { + "epoch": 1.1361508369139999, + "grad_norm": 42.51997375488281, + "learning_rate": 5e-06, + "loss": 0.9438, + "num_input_tokens_seen": 559428152, + "step": 4429 + }, + { + "epoch": 1.1361508369139999, + "loss": 1.183257818222046, + "loss_ce": 0.0011289167450740933, + "loss_iou": 0.515625, + "loss_num": 0.02978515625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 559428152, + "step": 4429 + }, + { + "epoch": 1.1364073622779451, + "grad_norm": 41.86601638793945, + "learning_rate": 5e-06, + "loss": 0.8697, + "num_input_tokens_seen": 559554584, + "step": 4430 + }, + { + "epoch": 1.1364073622779451, + "loss": 0.7875471115112305, + "loss_ce": 0.0014142829459160566, + "loss_iou": 0.369140625, + "loss_num": 0.009521484375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 559554584, + "step": 4430 + }, + { + "epoch": 1.1366638876418906, + "grad_norm": 45.27227020263672, + "learning_rate": 5e-06, + "loss": 0.8625, + "num_input_tokens_seen": 559680384, + "step": 4431 + }, + { + "epoch": 1.1366638876418906, + "loss": 0.8791888952255249, + "loss_ce": 0.0005267518572509289, + "loss_iou": 0.392578125, + "loss_num": 0.018310546875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 559680384, + "step": 4431 + }, + { + "epoch": 1.136920413005836, + "grad_norm": 36.407752990722656, + "learning_rate": 5e-06, + "loss": 0.9386, + "num_input_tokens_seen": 559804936, + "step": 4432 + }, + { + "epoch": 1.136920413005836, + "loss": 0.9912967681884766, + "loss_ce": 0.002771340310573578, + "loss_iou": 0.44140625, + "loss_num": 0.021484375, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 559804936, + "step": 4432 + }, + { + "epoch": 1.1371769383697814, + "grad_norm": 42.199607849121094, + "learning_rate": 5e-06, + "loss": 0.8679, + "num_input_tokens_seen": 559931020, + "step": 4433 + }, + { + "epoch": 1.1371769383697814, + "loss": 0.9299167394638062, + "loss_ce": 0.002182358643040061, + "loss_iou": 0.42578125, + "loss_num": 0.01556396484375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 559931020, + "step": 4433 + }, + { + "epoch": 1.1374334637337267, + "grad_norm": 81.89493560791016, + "learning_rate": 5e-06, + "loss": 0.8914, + "num_input_tokens_seen": 560057192, + "step": 4434 + }, + { + "epoch": 1.1374334637337267, + "loss": 0.8806267380714417, + "loss_ce": 0.0017204806208610535, + "loss_iou": 0.408203125, + "loss_num": 0.01239013671875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 560057192, + "step": 4434 + }, + { + "epoch": 1.137689989097672, + "grad_norm": 40.13206100463867, + "learning_rate": 5e-06, + "loss": 0.8605, + "num_input_tokens_seen": 560183524, + "step": 4435 + }, + { + "epoch": 1.137689989097672, + "loss": 0.860345721244812, + "loss_ce": 0.0014589800266548991, + "loss_iou": 0.404296875, + "loss_num": 0.0101318359375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 560183524, + "step": 4435 + }, + { + "epoch": 1.1379465144616174, + "grad_norm": 25.544431686401367, + "learning_rate": 5e-06, + "loss": 0.9067, + "num_input_tokens_seen": 560309308, + "step": 4436 + }, + { + "epoch": 1.1379465144616174, + "loss": 0.8781530857086182, + "loss_ce": 0.00022340784198604524, + "loss_iou": 0.412109375, + "loss_num": 0.0108642578125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 560309308, + "step": 4436 + }, + { + "epoch": 1.1382030398255627, + "grad_norm": 32.66789245605469, + "learning_rate": 5e-06, + "loss": 0.9011, + "num_input_tokens_seen": 560435688, + "step": 4437 + }, + { + "epoch": 1.1382030398255627, + "loss": 0.9783151149749756, + "loss_ce": 0.0022408643271774054, + "loss_iou": 0.453125, + "loss_num": 0.013671875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 560435688, + "step": 4437 + }, + { + "epoch": 1.1384595651895082, + "grad_norm": 51.138572692871094, + "learning_rate": 5e-06, + "loss": 0.7268, + "num_input_tokens_seen": 560561572, + "step": 4438 + }, + { + "epoch": 1.1384595651895082, + "loss": 1.0364906787872314, + "loss_ce": 0.0027991621755063534, + "loss_iou": 0.474609375, + "loss_num": 0.0169677734375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 560561572, + "step": 4438 + }, + { + "epoch": 1.1387160905534535, + "grad_norm": 44.8787841796875, + "learning_rate": 5e-06, + "loss": 0.8043, + "num_input_tokens_seen": 560688320, + "step": 4439 + }, + { + "epoch": 1.1387160905534535, + "loss": 0.9539803266525269, + "loss_ce": 0.0003670936275739223, + "loss_iou": 0.44140625, + "loss_num": 0.01458740234375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 560688320, + "step": 4439 + }, + { + "epoch": 1.1389726159173987, + "grad_norm": 37.48916244506836, + "learning_rate": 5e-06, + "loss": 0.8264, + "num_input_tokens_seen": 560814428, + "step": 4440 + }, + { + "epoch": 1.1389726159173987, + "loss": 0.7381656169891357, + "loss_ce": 0.0006168429972603917, + "loss_iou": 0.34765625, + "loss_num": 0.0081787109375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 560814428, + "step": 4440 + }, + { + "epoch": 1.1392291412813442, + "grad_norm": 42.377132415771484, + "learning_rate": 5e-06, + "loss": 0.8684, + "num_input_tokens_seen": 560940840, + "step": 4441 + }, + { + "epoch": 1.1392291412813442, + "loss": 0.8824979662895203, + "loss_ce": 0.0013944649836048484, + "loss_iou": 0.39453125, + "loss_num": 0.0185546875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 560940840, + "step": 4441 + }, + { + "epoch": 1.1394856666452895, + "grad_norm": 36.90537643432617, + "learning_rate": 5e-06, + "loss": 0.8099, + "num_input_tokens_seen": 561067712, + "step": 4442 + }, + { + "epoch": 1.1394856666452895, + "loss": 0.8814148306846619, + "loss_ce": 0.0010437555611133575, + "loss_iou": 0.41015625, + "loss_num": 0.01165771484375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 561067712, + "step": 4442 + }, + { + "epoch": 1.139742192009235, + "grad_norm": 40.15196228027344, + "learning_rate": 5e-06, + "loss": 0.9104, + "num_input_tokens_seen": 561194304, + "step": 4443 + }, + { + "epoch": 1.139742192009235, + "loss": 0.8533523678779602, + "loss_ce": 0.0003250593435950577, + "loss_iou": 0.40234375, + "loss_num": 0.0098876953125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 561194304, + "step": 4443 + }, + { + "epoch": 1.1399987173731803, + "grad_norm": 41.93926239013672, + "learning_rate": 5e-06, + "loss": 0.8838, + "num_input_tokens_seen": 561320660, + "step": 4444 + }, + { + "epoch": 1.1399987173731803, + "loss": 0.7890812158584595, + "loss_ce": 0.001727710710838437, + "loss_iou": 0.365234375, + "loss_num": 0.01141357421875, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 561320660, + "step": 4444 + }, + { + "epoch": 1.1402552427371258, + "grad_norm": 37.9354248046875, + "learning_rate": 5e-06, + "loss": 0.8466, + "num_input_tokens_seen": 561446632, + "step": 4445 + }, + { + "epoch": 1.1402552427371258, + "loss": 0.8583638668060303, + "loss_ce": 0.00020953506464138627, + "loss_iou": 0.384765625, + "loss_num": 0.0177001953125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 561446632, + "step": 4445 + }, + { + "epoch": 1.140511768101071, + "grad_norm": 35.330997467041016, + "learning_rate": 5e-06, + "loss": 0.8084, + "num_input_tokens_seen": 561572920, + "step": 4446 + }, + { + "epoch": 1.140511768101071, + "loss": 0.8883254528045654, + "loss_ce": 0.0030715276952832937, + "loss_iou": 0.400390625, + "loss_num": 0.016845703125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 561572920, + "step": 4446 + }, + { + "epoch": 1.1407682934650163, + "grad_norm": 40.6688346862793, + "learning_rate": 5e-06, + "loss": 0.797, + "num_input_tokens_seen": 561699828, + "step": 4447 + }, + { + "epoch": 1.1407682934650163, + "loss": 0.6675664782524109, + "loss_ce": 0.0008184141479432583, + "loss_iou": 0.298828125, + "loss_num": 0.013916015625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 561699828, + "step": 4447 + }, + { + "epoch": 1.1410248188289618, + "grad_norm": 43.41802215576172, + "learning_rate": 5e-06, + "loss": 0.8284, + "num_input_tokens_seen": 561826320, + "step": 4448 + }, + { + "epoch": 1.1410248188289618, + "loss": 0.8148727416992188, + "loss_ce": 0.0004196658555883914, + "loss_iou": 0.38671875, + "loss_num": 0.00830078125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 561826320, + "step": 4448 + }, + { + "epoch": 1.141281344192907, + "grad_norm": 31.469257354736328, + "learning_rate": 5e-06, + "loss": 0.7315, + "num_input_tokens_seen": 561951048, + "step": 4449 + }, + { + "epoch": 1.141281344192907, + "loss": 0.781965434551239, + "loss_ce": 0.0007154140621423721, + "loss_iou": 0.3671875, + "loss_num": 0.00970458984375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 561951048, + "step": 4449 + }, + { + "epoch": 1.1415378695568523, + "grad_norm": 21.298030853271484, + "learning_rate": 5e-06, + "loss": 0.8441, + "num_input_tokens_seen": 562077540, + "step": 4450 + }, + { + "epoch": 1.1415378695568523, + "loss": 0.8850439786911011, + "loss_ce": 0.0027198060415685177, + "loss_iou": 0.408203125, + "loss_num": 0.01324462890625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 562077540, + "step": 4450 + }, + { + "epoch": 1.1417943949207978, + "grad_norm": 29.97330665588379, + "learning_rate": 5e-06, + "loss": 0.926, + "num_input_tokens_seen": 562203112, + "step": 4451 + }, + { + "epoch": 1.1417943949207978, + "loss": 0.8590042591094971, + "loss_ce": 0.0010940725915133953, + "loss_iou": 0.408203125, + "loss_num": 0.00872802734375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 562203112, + "step": 4451 + }, + { + "epoch": 1.142050920284743, + "grad_norm": 16.372140884399414, + "learning_rate": 5e-06, + "loss": 0.8398, + "num_input_tokens_seen": 562329796, + "step": 4452 + }, + { + "epoch": 1.142050920284743, + "loss": 0.880555272102356, + "loss_ce": 0.0006724511040374637, + "loss_iou": 0.412109375, + "loss_num": 0.0106201171875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 562329796, + "step": 4452 + }, + { + "epoch": 1.1423074456486886, + "grad_norm": 29.026350021362305, + "learning_rate": 5e-06, + "loss": 0.8319, + "num_input_tokens_seen": 562455572, + "step": 4453 + }, + { + "epoch": 1.1423074456486886, + "loss": 0.7127703428268433, + "loss_ce": 0.008180531673133373, + "loss_iou": 0.33984375, + "loss_num": 0.004852294921875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 562455572, + "step": 4453 + }, + { + "epoch": 1.1425639710126338, + "grad_norm": 42.253849029541016, + "learning_rate": 5e-06, + "loss": 0.866, + "num_input_tokens_seen": 562582028, + "step": 4454 + }, + { + "epoch": 1.1425639710126338, + "loss": 0.8493388891220093, + "loss_ce": 0.00021777946676593274, + "loss_iou": 0.40234375, + "loss_num": 0.009033203125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 562582028, + "step": 4454 + }, + { + "epoch": 1.1428204963765793, + "grad_norm": 116.01903533935547, + "learning_rate": 5e-06, + "loss": 0.8205, + "num_input_tokens_seen": 562708652, + "step": 4455 + }, + { + "epoch": 1.1428204963765793, + "loss": 0.8533685207366943, + "loss_ce": 0.0013176830252632499, + "loss_iou": 0.38671875, + "loss_num": 0.015380859375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 562708652, + "step": 4455 + }, + { + "epoch": 1.1430770217405246, + "grad_norm": 50.579341888427734, + "learning_rate": 5e-06, + "loss": 0.9816, + "num_input_tokens_seen": 562835348, + "step": 4456 + }, + { + "epoch": 1.1430770217405246, + "loss": 1.0645085573196411, + "loss_ce": 0.0010319515131413937, + "loss_iou": 0.486328125, + "loss_num": 0.01806640625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 562835348, + "step": 4456 + }, + { + "epoch": 1.1433335471044699, + "grad_norm": 36.5203971862793, + "learning_rate": 5e-06, + "loss": 0.8091, + "num_input_tokens_seen": 562962920, + "step": 4457 + }, + { + "epoch": 1.1433335471044699, + "loss": 0.9490381479263306, + "loss_ce": 0.0017725086072459817, + "loss_iou": 0.427734375, + "loss_num": 0.0181884765625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 562962920, + "step": 4457 + }, + { + "epoch": 1.1435900724684154, + "grad_norm": 43.22640609741211, + "learning_rate": 5e-06, + "loss": 0.7978, + "num_input_tokens_seen": 563089332, + "step": 4458 + }, + { + "epoch": 1.1435900724684154, + "loss": 0.8668995499610901, + "loss_ce": 0.0002003289555432275, + "loss_iou": 0.404296875, + "loss_num": 0.01171875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 563089332, + "step": 4458 + }, + { + "epoch": 1.1438465978323606, + "grad_norm": 43.997127532958984, + "learning_rate": 5e-06, + "loss": 0.7852, + "num_input_tokens_seen": 563215036, + "step": 4459 + }, + { + "epoch": 1.1438465978323606, + "loss": 0.7188260555267334, + "loss_ce": 0.0015408790204674006, + "loss_iou": 0.34765625, + "loss_num": 0.00445556640625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 563215036, + "step": 4459 + }, + { + "epoch": 1.144103123196306, + "grad_norm": 51.04657745361328, + "learning_rate": 5e-06, + "loss": 0.8408, + "num_input_tokens_seen": 563341140, + "step": 4460 + }, + { + "epoch": 1.144103123196306, + "loss": 1.0377075672149658, + "loss_ce": 0.0010864771902561188, + "loss_iou": 0.482421875, + "loss_num": 0.0145263671875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 563341140, + "step": 4460 + }, + { + "epoch": 1.1443596485602514, + "grad_norm": 55.272396087646484, + "learning_rate": 5e-06, + "loss": 0.9946, + "num_input_tokens_seen": 563467016, + "step": 4461 + }, + { + "epoch": 1.1443596485602514, + "loss": 0.9771977663040161, + "loss_ce": 0.00039115382242016494, + "loss_iou": 0.45703125, + "loss_num": 0.01214599609375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 563467016, + "step": 4461 + }, + { + "epoch": 1.1446161739241967, + "grad_norm": 45.25038528442383, + "learning_rate": 5e-06, + "loss": 0.7953, + "num_input_tokens_seen": 563594052, + "step": 4462 + }, + { + "epoch": 1.1446161739241967, + "loss": 0.7985683679580688, + "loss_ce": 0.0012050714576616883, + "loss_iou": 0.375, + "loss_num": 0.00927734375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 563594052, + "step": 4462 + }, + { + "epoch": 1.1448726992881422, + "grad_norm": 37.15034866333008, + "learning_rate": 5e-06, + "loss": 0.8943, + "num_input_tokens_seen": 563719560, + "step": 4463 + }, + { + "epoch": 1.1448726992881422, + "loss": 0.9501623511314392, + "loss_ce": 0.00045529319322668016, + "loss_iou": 0.416015625, + "loss_num": 0.0235595703125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 563719560, + "step": 4463 + }, + { + "epoch": 1.1451292246520874, + "grad_norm": 44.28472900390625, + "learning_rate": 5e-06, + "loss": 0.8514, + "num_input_tokens_seen": 563845848, + "step": 4464 + }, + { + "epoch": 1.1451292246520874, + "loss": 0.7816656827926636, + "loss_ce": 0.0013922583311796188, + "loss_iou": 0.361328125, + "loss_num": 0.01177978515625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 563845848, + "step": 4464 + }, + { + "epoch": 1.145385750016033, + "grad_norm": 59.25954055786133, + "learning_rate": 5e-06, + "loss": 0.8846, + "num_input_tokens_seen": 563971700, + "step": 4465 + }, + { + "epoch": 1.145385750016033, + "loss": 0.9880316257476807, + "loss_ce": 0.0014594256645068526, + "loss_iou": 0.45703125, + "loss_num": 0.0147705078125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 563971700, + "step": 4465 + }, + { + "epoch": 1.1456422753799782, + "grad_norm": 42.791873931884766, + "learning_rate": 5e-06, + "loss": 0.9027, + "num_input_tokens_seen": 564098844, + "step": 4466 + }, + { + "epoch": 1.1456422753799782, + "loss": 0.9128975868225098, + "loss_ce": 0.0010324051836505532, + "loss_iou": 0.400390625, + "loss_num": 0.0220947265625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 564098844, + "step": 4466 + }, + { + "epoch": 1.1458988007439235, + "grad_norm": 36.11970138549805, + "learning_rate": 5e-06, + "loss": 0.7329, + "num_input_tokens_seen": 564224832, + "step": 4467 + }, + { + "epoch": 1.1458988007439235, + "loss": 0.7026975750923157, + "loss_ce": 0.0007932375301606953, + "loss_iou": 0.328125, + "loss_num": 0.0087890625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 564224832, + "step": 4467 + }, + { + "epoch": 1.146155326107869, + "grad_norm": 42.857234954833984, + "learning_rate": 5e-06, + "loss": 0.8345, + "num_input_tokens_seen": 564351184, + "step": 4468 + }, + { + "epoch": 1.146155326107869, + "loss": 0.6153695583343506, + "loss_ce": 0.00013519487401936203, + "loss_iou": 0.298828125, + "loss_num": 0.0038604736328125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 564351184, + "step": 4468 + }, + { + "epoch": 1.1464118514718142, + "grad_norm": 36.790096282958984, + "learning_rate": 5e-06, + "loss": 0.7609, + "num_input_tokens_seen": 564475868, + "step": 4469 + }, + { + "epoch": 1.1464118514718142, + "loss": 0.66095370054245, + "loss_ce": 0.0010415579890832305, + "loss_iou": 0.318359375, + "loss_num": 0.004791259765625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 564475868, + "step": 4469 + }, + { + "epoch": 1.1466683768357597, + "grad_norm": 62.581661224365234, + "learning_rate": 5e-06, + "loss": 0.8514, + "num_input_tokens_seen": 564603108, + "step": 4470 + }, + { + "epoch": 1.1466683768357597, + "loss": 1.0341262817382812, + "loss_ce": 0.001899718539789319, + "loss_iou": 0.4765625, + "loss_num": 0.0155029296875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 564603108, + "step": 4470 + }, + { + "epoch": 1.146924902199705, + "grad_norm": 50.36272048950195, + "learning_rate": 5e-06, + "loss": 0.8329, + "num_input_tokens_seen": 564730036, + "step": 4471 + }, + { + "epoch": 1.146924902199705, + "loss": 0.9419748783111572, + "loss_ce": 0.0005686100339516997, + "loss_iou": 0.42578125, + "loss_num": 0.017578125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 564730036, + "step": 4471 + }, + { + "epoch": 1.1471814275636503, + "grad_norm": 42.16992950439453, + "learning_rate": 5e-06, + "loss": 1.0065, + "num_input_tokens_seen": 564855300, + "step": 4472 + }, + { + "epoch": 1.1471814275636503, + "loss": 0.8988373279571533, + "loss_ce": 0.004061905667185783, + "loss_iou": 0.4140625, + "loss_num": 0.01312255859375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 564855300, + "step": 4472 + }, + { + "epoch": 1.1474379529275958, + "grad_norm": 52.223533630371094, + "learning_rate": 5e-06, + "loss": 0.8039, + "num_input_tokens_seen": 564981580, + "step": 4473 + }, + { + "epoch": 1.1474379529275958, + "loss": 0.6878585815429688, + "loss_ce": 0.00011441703099990264, + "loss_iou": 0.3203125, + "loss_num": 0.0093994140625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 564981580, + "step": 4473 + }, + { + "epoch": 1.147694478291541, + "grad_norm": 52.660133361816406, + "learning_rate": 5e-06, + "loss": 0.9209, + "num_input_tokens_seen": 565109268, + "step": 4474 + }, + { + "epoch": 1.147694478291541, + "loss": 0.7826834917068481, + "loss_ce": 0.0002127476327586919, + "loss_iou": 0.365234375, + "loss_num": 0.01025390625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 565109268, + "step": 4474 + }, + { + "epoch": 1.1479510036554865, + "grad_norm": 39.854496002197266, + "learning_rate": 5e-06, + "loss": 0.8417, + "num_input_tokens_seen": 565235052, + "step": 4475 + }, + { + "epoch": 1.1479510036554865, + "loss": 0.9808903932571411, + "loss_ce": 0.0009099766612052917, + "loss_iou": 0.431640625, + "loss_num": 0.023193359375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 565235052, + "step": 4475 + }, + { + "epoch": 1.1482075290194318, + "grad_norm": 33.5964469909668, + "learning_rate": 5e-06, + "loss": 0.871, + "num_input_tokens_seen": 565362168, + "step": 4476 + }, + { + "epoch": 1.1482075290194318, + "loss": 0.9289098978042603, + "loss_ce": 0.002640355844050646, + "loss_iou": 0.431640625, + "loss_num": 0.0128173828125, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 565362168, + "step": 4476 + }, + { + "epoch": 1.148464054383377, + "grad_norm": 28.817657470703125, + "learning_rate": 5e-06, + "loss": 0.9246, + "num_input_tokens_seen": 565490448, + "step": 4477 + }, + { + "epoch": 1.148464054383377, + "loss": 1.0202233791351318, + "loss_ce": 0.00020388190750963986, + "loss_iou": 0.443359375, + "loss_num": 0.0264892578125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 565490448, + "step": 4477 + }, + { + "epoch": 1.1487205797473226, + "grad_norm": 30.86312484741211, + "learning_rate": 5e-06, + "loss": 0.8826, + "num_input_tokens_seen": 565616992, + "step": 4478 + }, + { + "epoch": 1.1487205797473226, + "loss": 0.7181052565574646, + "loss_ce": 0.00033183899358846247, + "loss_iou": 0.34375, + "loss_num": 0.006195068359375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 565616992, + "step": 4478 + }, + { + "epoch": 1.1489771051112678, + "grad_norm": 31.412593841552734, + "learning_rate": 5e-06, + "loss": 0.7784, + "num_input_tokens_seen": 565742968, + "step": 4479 + }, + { + "epoch": 1.1489771051112678, + "loss": 0.7363811731338501, + "loss_ce": 0.0015178981702774763, + "loss_iou": 0.34375, + "loss_num": 0.00921630859375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 565742968, + "step": 4479 + }, + { + "epoch": 1.1492336304752133, + "grad_norm": 35.64606857299805, + "learning_rate": 5e-06, + "loss": 0.887, + "num_input_tokens_seen": 565869020, + "step": 4480 + }, + { + "epoch": 1.1492336304752133, + "loss": 0.818684458732605, + "loss_ce": 8.09276825748384e-05, + "loss_iou": 0.392578125, + "loss_num": 0.006622314453125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 565869020, + "step": 4480 + }, + { + "epoch": 1.1494901558391586, + "grad_norm": 33.52762985229492, + "learning_rate": 5e-06, + "loss": 0.9468, + "num_input_tokens_seen": 565996772, + "step": 4481 + }, + { + "epoch": 1.1494901558391586, + "loss": 0.8973528146743774, + "loss_ce": 0.00038010289426892996, + "loss_iou": 0.419921875, + "loss_num": 0.010986328125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 565996772, + "step": 4481 + }, + { + "epoch": 1.149746681203104, + "grad_norm": 31.258670806884766, + "learning_rate": 5e-06, + "loss": 0.8503, + "num_input_tokens_seen": 566122716, + "step": 4482 + }, + { + "epoch": 1.149746681203104, + "loss": 0.9564247131347656, + "loss_ce": 0.0008582413429394364, + "loss_iou": 0.4296875, + "loss_num": 0.0191650390625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 566122716, + "step": 4482 + }, + { + "epoch": 1.1500032065670494, + "grad_norm": 32.91709518432617, + "learning_rate": 5e-06, + "loss": 0.7186, + "num_input_tokens_seen": 566249224, + "step": 4483 + }, + { + "epoch": 1.1500032065670494, + "loss": 0.6349152326583862, + "loss_ce": 0.00014958748943172395, + "loss_iou": 0.296875, + "loss_num": 0.0081787109375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 566249224, + "step": 4483 + }, + { + "epoch": 1.1502597319309946, + "grad_norm": 29.065040588378906, + "learning_rate": 5e-06, + "loss": 1.0251, + "num_input_tokens_seen": 566373656, + "step": 4484 + }, + { + "epoch": 1.1502597319309946, + "loss": 1.1403391361236572, + "loss_ce": 0.001667277654632926, + "loss_iou": 0.49609375, + "loss_num": 0.029296875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 566373656, + "step": 4484 + }, + { + "epoch": 1.1505162572949401, + "grad_norm": 32.31103515625, + "learning_rate": 5e-06, + "loss": 0.8328, + "num_input_tokens_seen": 566500508, + "step": 4485 + }, + { + "epoch": 1.1505162572949401, + "loss": 0.8124203681945801, + "loss_ce": 0.00040864365291781723, + "loss_iou": 0.373046875, + "loss_num": 0.0135498046875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 566500508, + "step": 4485 + }, + { + "epoch": 1.1507727826588854, + "grad_norm": 45.648014068603516, + "learning_rate": 5e-06, + "loss": 0.8443, + "num_input_tokens_seen": 566626472, + "step": 4486 + }, + { + "epoch": 1.1507727826588854, + "loss": 0.8159573078155518, + "loss_ce": 0.0005275791045278311, + "loss_iou": 0.37109375, + "loss_num": 0.014892578125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 566626472, + "step": 4486 + }, + { + "epoch": 1.1510293080228307, + "grad_norm": 46.22900390625, + "learning_rate": 5e-06, + "loss": 0.8089, + "num_input_tokens_seen": 566752160, + "step": 4487 + }, + { + "epoch": 1.1510293080228307, + "loss": 0.8395812511444092, + "loss_ce": 0.0007140823872759938, + "loss_iou": 0.390625, + "loss_num": 0.01202392578125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 566752160, + "step": 4487 + }, + { + "epoch": 1.1512858333867761, + "grad_norm": 75.57136535644531, + "learning_rate": 5e-06, + "loss": 1.0162, + "num_input_tokens_seen": 566878636, + "step": 4488 + }, + { + "epoch": 1.1512858333867761, + "loss": 0.894891619682312, + "loss_ce": 0.0003603480872698128, + "loss_iou": 0.427734375, + "loss_num": 0.008056640625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 566878636, + "step": 4488 + }, + { + "epoch": 1.1515423587507214, + "grad_norm": 54.54248046875, + "learning_rate": 5e-06, + "loss": 0.8515, + "num_input_tokens_seen": 567006132, + "step": 4489 + }, + { + "epoch": 1.1515423587507214, + "loss": 0.7974320650100708, + "loss_ce": 0.0010453383438289165, + "loss_iou": 0.359375, + "loss_num": 0.0157470703125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 567006132, + "step": 4489 + }, + { + "epoch": 1.151798884114667, + "grad_norm": 53.7583122253418, + "learning_rate": 5e-06, + "loss": 0.7395, + "num_input_tokens_seen": 567132256, + "step": 4490 + }, + { + "epoch": 1.151798884114667, + "loss": 0.7229307889938354, + "loss_ce": 0.0007628169259987772, + "loss_iou": 0.330078125, + "loss_num": 0.0120849609375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 567132256, + "step": 4490 + }, + { + "epoch": 1.1520554094786122, + "grad_norm": 45.05324172973633, + "learning_rate": 5e-06, + "loss": 0.8284, + "num_input_tokens_seen": 567257424, + "step": 4491 + }, + { + "epoch": 1.1520554094786122, + "loss": 0.719456672668457, + "loss_ce": 0.0007066383259370923, + "loss_iou": 0.341796875, + "loss_num": 0.006683349609375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 567257424, + "step": 4491 + }, + { + "epoch": 1.1523119348425577, + "grad_norm": 46.05257034301758, + "learning_rate": 5e-06, + "loss": 0.7932, + "num_input_tokens_seen": 567383636, + "step": 4492 + }, + { + "epoch": 1.1523119348425577, + "loss": 0.8213576078414917, + "loss_ce": 0.0010451130801811814, + "loss_iou": 0.384765625, + "loss_num": 0.010498046875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 567383636, + "step": 4492 + }, + { + "epoch": 1.152568460206503, + "grad_norm": 41.408573150634766, + "learning_rate": 5e-06, + "loss": 0.7926, + "num_input_tokens_seen": 567508556, + "step": 4493 + }, + { + "epoch": 1.152568460206503, + "loss": 0.7170243263244629, + "loss_ce": 0.0007156881038099527, + "loss_iou": 0.33203125, + "loss_num": 0.01080322265625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 567508556, + "step": 4493 + }, + { + "epoch": 1.1528249855704482, + "grad_norm": 43.28091812133789, + "learning_rate": 5e-06, + "loss": 0.7394, + "num_input_tokens_seen": 567634944, + "step": 4494 + }, + { + "epoch": 1.1528249855704482, + "loss": 0.6411104798316956, + "loss_ce": 0.0002413822803646326, + "loss_iou": 0.306640625, + "loss_num": 0.005157470703125, + "loss_xval": 0.640625, + "num_input_tokens_seen": 567634944, + "step": 4494 + }, + { + "epoch": 1.1530815109343937, + "grad_norm": 53.84357833862305, + "learning_rate": 5e-06, + "loss": 0.9558, + "num_input_tokens_seen": 567760996, + "step": 4495 + }, + { + "epoch": 1.1530815109343937, + "loss": 0.8985980153083801, + "loss_ce": 0.0016253418289124966, + "loss_iou": 0.42578125, + "loss_num": 0.00946044921875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 567760996, + "step": 4495 + }, + { + "epoch": 1.153338036298339, + "grad_norm": 47.778160095214844, + "learning_rate": 5e-06, + "loss": 0.9394, + "num_input_tokens_seen": 567886968, + "step": 4496 + }, + { + "epoch": 1.153338036298339, + "loss": 0.9270901679992676, + "loss_ce": 0.000820667133666575, + "loss_iou": 0.435546875, + "loss_num": 0.01129150390625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 567886968, + "step": 4496 + }, + { + "epoch": 1.1535945616622842, + "grad_norm": 51.297630310058594, + "learning_rate": 5e-06, + "loss": 0.8352, + "num_input_tokens_seen": 568013144, + "step": 4497 + }, + { + "epoch": 1.1535945616622842, + "loss": 0.9491879343986511, + "loss_ce": 0.0009457315900363028, + "loss_iou": 0.431640625, + "loss_num": 0.01708984375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 568013144, + "step": 4497 + }, + { + "epoch": 1.1538510870262297, + "grad_norm": 39.950984954833984, + "learning_rate": 5e-06, + "loss": 0.8372, + "num_input_tokens_seen": 568139476, + "step": 4498 + }, + { + "epoch": 1.1538510870262297, + "loss": 0.7550534009933472, + "loss_ce": 0.0023678604047745466, + "loss_iou": 0.349609375, + "loss_num": 0.01092529296875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 568139476, + "step": 4498 + }, + { + "epoch": 1.154107612390175, + "grad_norm": 44.750091552734375, + "learning_rate": 5e-06, + "loss": 0.8008, + "num_input_tokens_seen": 568266088, + "step": 4499 + }, + { + "epoch": 1.154107612390175, + "loss": 0.7812924981117249, + "loss_ce": 0.00028664572164416313, + "loss_iou": 0.365234375, + "loss_num": 0.0103759765625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 568266088, + "step": 4499 + }, + { + "epoch": 1.1543641377541205, + "grad_norm": 34.144317626953125, + "learning_rate": 5e-06, + "loss": 0.8615, + "num_input_tokens_seen": 568392016, + "step": 4500 + }, + { + "epoch": 1.1543641377541205, + "eval_icons_CIoU": 0.31648050248622894, + "eval_icons_GIoU": 0.2780092731118202, + "eval_icons_IoU": 0.4821384400129318, + "eval_icons_MAE_all": 0.020150872878730297, + "eval_icons_MAE_h": 0.028211926110088825, + "eval_icons_MAE_w": 0.04127909615635872, + "eval_icons_MAE_x_boxes": 0.04051684029400349, + "eval_icons_MAE_y_boxes": 0.028979620896279812, + "eval_icons_NUM_probability": 0.999811053276062, + "eval_icons_inside_bbox": 0.7395833432674408, + "eval_icons_loss": 1.5006988048553467, + "eval_icons_loss_ce": 6.302247493294999e-05, + "eval_icons_loss_iou": 0.68603515625, + "eval_icons_loss_num": 0.020051956176757812, + "eval_icons_loss_xval": 1.47265625, + "eval_icons_runtime": 45.347, + "eval_icons_samples_per_second": 1.103, + "eval_icons_steps_per_second": 0.044, + "num_input_tokens_seen": 568392016, + "step": 4500 + }, + { + "epoch": 1.1543641377541205, + "eval_screenspot_CIoU": 0.121437502404054, + "eval_screenspot_GIoU": 0.10200619076689084, + "eval_screenspot_IoU": 0.2918974955876668, + "eval_screenspot_MAE_all": 0.07808919499317805, + "eval_screenspot_MAE_h": 0.06975071256359418, + "eval_screenspot_MAE_w": 0.1278847207625707, + "eval_screenspot_MAE_x_boxes": 0.10676705092191696, + "eval_screenspot_MAE_y_boxes": 0.0545857734978199, + "eval_screenspot_NUM_probability": 0.99995090564092, + "eval_screenspot_inside_bbox": 0.6016666690508524, + "eval_screenspot_loss": 2.2424089908599854, + "eval_screenspot_loss_ce": 0.004168823594227433, + "eval_screenspot_loss_iou": 0.9310709635416666, + "eval_screenspot_loss_num": 0.08284505208333333, + "eval_screenspot_loss_xval": 2.2770182291666665, + "eval_screenspot_runtime": 77.8125, + "eval_screenspot_samples_per_second": 1.144, + "eval_screenspot_steps_per_second": 0.039, + "num_input_tokens_seen": 568392016, + "step": 4500 + }, + { + "epoch": 1.1543641377541205, + "loss": 2.2521250247955322, + "loss_ce": 0.002125152852386236, + "loss_iou": 0.94140625, + "loss_num": 0.07373046875, + "loss_xval": 2.25, + "num_input_tokens_seen": 568392016, + "step": 4500 + }, + { + "epoch": 1.1546206631180658, + "grad_norm": 37.628116607666016, + "learning_rate": 5e-06, + "loss": 0.9212, + "num_input_tokens_seen": 568518948, + "step": 4501 + }, + { + "epoch": 1.1546206631180658, + "loss": 0.9438918232917786, + "loss_ce": 0.001997299026697874, + "loss_iou": 0.43359375, + "loss_num": 0.015380859375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 568518948, + "step": 4501 + }, + { + "epoch": 1.1548771884820113, + "grad_norm": 47.65564727783203, + "learning_rate": 5e-06, + "loss": 0.9246, + "num_input_tokens_seen": 568645208, + "step": 4502 + }, + { + "epoch": 1.1548771884820113, + "loss": 0.8269538879394531, + "loss_ce": 0.00419996352866292, + "loss_iou": 0.369140625, + "loss_num": 0.0169677734375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 568645208, + "step": 4502 + }, + { + "epoch": 1.1551337138459565, + "grad_norm": 52.776676177978516, + "learning_rate": 5e-06, + "loss": 0.8837, + "num_input_tokens_seen": 568772776, + "step": 4503 + }, + { + "epoch": 1.1551337138459565, + "loss": 0.978903591632843, + "loss_ce": 0.00038796901935711503, + "loss_iou": 0.4609375, + "loss_num": 0.01141357421875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 568772776, + "step": 4503 + }, + { + "epoch": 1.1553902392099018, + "grad_norm": 57.221492767333984, + "learning_rate": 5e-06, + "loss": 0.9338, + "num_input_tokens_seen": 568898528, + "step": 4504 + }, + { + "epoch": 1.1553902392099018, + "loss": 1.0634046792984009, + "loss_ce": 0.0009046964696608484, + "loss_iou": 0.470703125, + "loss_num": 0.0242919921875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 568898528, + "step": 4504 + }, + { + "epoch": 1.1556467645738473, + "grad_norm": 48.78168869018555, + "learning_rate": 5e-06, + "loss": 0.8847, + "num_input_tokens_seen": 569025400, + "step": 4505 + }, + { + "epoch": 1.1556467645738473, + "loss": 0.9872300624847412, + "loss_ce": 0.00041369517566636205, + "loss_iou": 0.4453125, + "loss_num": 0.0196533203125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 569025400, + "step": 4505 + }, + { + "epoch": 1.1559032899377926, + "grad_norm": 49.437461853027344, + "learning_rate": 5e-06, + "loss": 0.9292, + "num_input_tokens_seen": 569151676, + "step": 4506 + }, + { + "epoch": 1.1559032899377926, + "loss": 0.8975967168807983, + "loss_ce": 0.00013573843170888722, + "loss_iou": 0.4140625, + "loss_num": 0.013916015625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 569151676, + "step": 4506 + }, + { + "epoch": 1.1561598153017378, + "grad_norm": 48.490631103515625, + "learning_rate": 5e-06, + "loss": 0.8247, + "num_input_tokens_seen": 569277772, + "step": 4507 + }, + { + "epoch": 1.1561598153017378, + "loss": 0.7595241069793701, + "loss_ce": 0.0007350355153903365, + "loss_iou": 0.36328125, + "loss_num": 0.0062255859375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 569277772, + "step": 4507 + }, + { + "epoch": 1.1564163406656833, + "grad_norm": 29.831327438354492, + "learning_rate": 5e-06, + "loss": 0.8537, + "num_input_tokens_seen": 569403812, + "step": 4508 + }, + { + "epoch": 1.1564163406656833, + "loss": 1.1168478727340698, + "loss_ce": 0.0025901086628437042, + "loss_iou": 0.49609375, + "loss_num": 0.02490234375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 569403812, + "step": 4508 + }, + { + "epoch": 1.1566728660296286, + "grad_norm": 28.618806838989258, + "learning_rate": 5e-06, + "loss": 0.919, + "num_input_tokens_seen": 569529644, + "step": 4509 + }, + { + "epoch": 1.1566728660296286, + "loss": 0.7928444147109985, + "loss_ce": 0.00036399037344381213, + "loss_iou": 0.376953125, + "loss_num": 0.007720947265625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 569529644, + "step": 4509 + }, + { + "epoch": 1.156929391393574, + "grad_norm": 23.060558319091797, + "learning_rate": 5e-06, + "loss": 0.8329, + "num_input_tokens_seen": 569655064, + "step": 4510 + }, + { + "epoch": 1.156929391393574, + "loss": 0.6823216676712036, + "loss_ce": 0.0006810561753809452, + "loss_iou": 0.330078125, + "loss_num": 0.00421142578125, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 569655064, + "step": 4510 + }, + { + "epoch": 1.1571859167575194, + "grad_norm": 34.519962310791016, + "learning_rate": 5e-06, + "loss": 0.8859, + "num_input_tokens_seen": 569780348, + "step": 4511 + }, + { + "epoch": 1.1571859167575194, + "loss": 0.9675500392913818, + "loss_ce": 0.0024621710181236267, + "loss_iou": 0.435546875, + "loss_num": 0.019287109375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 569780348, + "step": 4511 + }, + { + "epoch": 1.1574424421214649, + "grad_norm": 37.16963195800781, + "learning_rate": 5e-06, + "loss": 0.8462, + "num_input_tokens_seen": 569906252, + "step": 4512 + }, + { + "epoch": 1.1574424421214649, + "loss": 0.8693113327026367, + "loss_ce": 0.0001707051123958081, + "loss_iou": 0.40625, + "loss_num": 0.01080322265625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 569906252, + "step": 4512 + }, + { + "epoch": 1.1576989674854101, + "grad_norm": 51.7944221496582, + "learning_rate": 5e-06, + "loss": 0.8765, + "num_input_tokens_seen": 570032548, + "step": 4513 + }, + { + "epoch": 1.1576989674854101, + "loss": 0.6732085943222046, + "loss_ce": 0.0006011518416926265, + "loss_iou": 0.322265625, + "loss_num": 0.00592041015625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 570032548, + "step": 4513 + }, + { + "epoch": 1.1579554928493554, + "grad_norm": 56.30427169799805, + "learning_rate": 5e-06, + "loss": 0.9273, + "num_input_tokens_seen": 570159364, + "step": 4514 + }, + { + "epoch": 1.1579554928493554, + "loss": 0.9967440366744995, + "loss_ce": 0.00016202114056795835, + "loss_iou": 0.453125, + "loss_num": 0.0181884765625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 570159364, + "step": 4514 + }, + { + "epoch": 1.1582120182133009, + "grad_norm": 19.279375076293945, + "learning_rate": 5e-06, + "loss": 0.8959, + "num_input_tokens_seen": 570284772, + "step": 4515 + }, + { + "epoch": 1.1582120182133009, + "loss": 0.8812190294265747, + "loss_ce": 0.0013362191384658217, + "loss_iou": 0.40625, + "loss_num": 0.01348876953125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 570284772, + "step": 4515 + }, + { + "epoch": 1.1584685435772462, + "grad_norm": 14.974258422851562, + "learning_rate": 5e-06, + "loss": 0.8063, + "num_input_tokens_seen": 570410576, + "step": 4516 + }, + { + "epoch": 1.1584685435772462, + "loss": 0.9165507555007935, + "loss_ce": 0.0002909849863499403, + "loss_iou": 0.421875, + "loss_num": 0.01470947265625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 570410576, + "step": 4516 + }, + { + "epoch": 1.1587250689411916, + "grad_norm": 19.54275894165039, + "learning_rate": 5e-06, + "loss": 0.8425, + "num_input_tokens_seen": 570537124, + "step": 4517 + }, + { + "epoch": 1.1587250689411916, + "loss": 0.7510870695114136, + "loss_ce": 0.0015753908082842827, + "loss_iou": 0.341796875, + "loss_num": 0.01361083984375, + "loss_xval": 0.75, + "num_input_tokens_seen": 570537124, + "step": 4517 + }, + { + "epoch": 1.158981594305137, + "grad_norm": 23.062849044799805, + "learning_rate": 5e-06, + "loss": 0.8793, + "num_input_tokens_seen": 570662992, + "step": 4518 + }, + { + "epoch": 1.158981594305137, + "loss": 0.8020868301391602, + "loss_ce": 0.0005731126293540001, + "loss_iou": 0.369140625, + "loss_num": 0.01300048828125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 570662992, + "step": 4518 + }, + { + "epoch": 1.1592381196690822, + "grad_norm": 22.309118270874023, + "learning_rate": 5e-06, + "loss": 0.9284, + "num_input_tokens_seen": 570789376, + "step": 4519 + }, + { + "epoch": 1.1592381196690822, + "loss": 0.7335063815116882, + "loss_ce": 0.0010845317738130689, + "loss_iou": 0.345703125, + "loss_num": 0.00799560546875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 570789376, + "step": 4519 + }, + { + "epoch": 1.1594946450330277, + "grad_norm": 34.2843017578125, + "learning_rate": 5e-06, + "loss": 0.8957, + "num_input_tokens_seen": 570915372, + "step": 4520 + }, + { + "epoch": 1.1594946450330277, + "loss": 0.945745050907135, + "loss_ce": 0.002873985795304179, + "loss_iou": 0.435546875, + "loss_num": 0.01446533203125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 570915372, + "step": 4520 + }, + { + "epoch": 1.159751170396973, + "grad_norm": 25.52448272705078, + "learning_rate": 5e-06, + "loss": 0.8369, + "num_input_tokens_seen": 571041460, + "step": 4521 + }, + { + "epoch": 1.159751170396973, + "loss": 0.8421894907951355, + "loss_ce": 0.0003926500503439456, + "loss_iou": 0.392578125, + "loss_num": 0.010986328125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 571041460, + "step": 4521 + }, + { + "epoch": 1.1600076957609184, + "grad_norm": 35.809532165527344, + "learning_rate": 5e-06, + "loss": 0.8165, + "num_input_tokens_seen": 571168200, + "step": 4522 + }, + { + "epoch": 1.1600076957609184, + "loss": 0.9303097724914551, + "loss_ce": 0.003551972098648548, + "loss_iou": 0.421875, + "loss_num": 0.01708984375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 571168200, + "step": 4522 + }, + { + "epoch": 1.1602642211248637, + "grad_norm": 57.80194854736328, + "learning_rate": 5e-06, + "loss": 0.8622, + "num_input_tokens_seen": 571294844, + "step": 4523 + }, + { + "epoch": 1.1602642211248637, + "loss": 0.8874683380126953, + "loss_ce": 0.0002613542601466179, + "loss_iou": 0.40625, + "loss_num": 0.01483154296875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 571294844, + "step": 4523 + }, + { + "epoch": 1.160520746488809, + "grad_norm": 35.87123489379883, + "learning_rate": 5e-06, + "loss": 0.8668, + "num_input_tokens_seen": 571420404, + "step": 4524 + }, + { + "epoch": 1.160520746488809, + "loss": 0.9866613149642944, + "loss_ce": 0.0005773517768830061, + "loss_iou": 0.435546875, + "loss_num": 0.0230712890625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 571420404, + "step": 4524 + }, + { + "epoch": 1.1607772718527545, + "grad_norm": 40.02013397216797, + "learning_rate": 5e-06, + "loss": 0.8504, + "num_input_tokens_seen": 571547224, + "step": 4525 + }, + { + "epoch": 1.1607772718527545, + "loss": 0.8007362484931946, + "loss_ce": 7.708168413955718e-05, + "loss_iou": 0.373046875, + "loss_num": 0.01129150390625, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 571547224, + "step": 4525 + }, + { + "epoch": 1.1610337972166997, + "grad_norm": 34.42779541015625, + "learning_rate": 5e-06, + "loss": 0.8155, + "num_input_tokens_seen": 571673760, + "step": 4526 + }, + { + "epoch": 1.1610337972166997, + "loss": 0.7085493803024292, + "loss_ce": 5.33264537807554e-05, + "loss_iou": 0.33984375, + "loss_num": 0.00555419921875, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 571673760, + "step": 4526 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 42.717498779296875, + "learning_rate": 5e-06, + "loss": 0.808, + "num_input_tokens_seen": 571798876, + "step": 4527 + }, + { + "epoch": 1.1612903225806452, + "loss": 0.7098283767700195, + "loss_ce": 0.00011160923895658925, + "loss_iou": 0.3359375, + "loss_num": 0.007598876953125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 571798876, + "step": 4527 + }, + { + "epoch": 1.1615468479445905, + "grad_norm": 36.57347106933594, + "learning_rate": 5e-06, + "loss": 0.9772, + "num_input_tokens_seen": 571925472, + "step": 4528 + }, + { + "epoch": 1.1615468479445905, + "loss": 1.2090398073196411, + "loss_ce": 0.001032043481245637, + "loss_iou": 0.55078125, + "loss_num": 0.02099609375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 571925472, + "step": 4528 + }, + { + "epoch": 1.161803373308536, + "grad_norm": 32.93621826171875, + "learning_rate": 5e-06, + "loss": 0.8169, + "num_input_tokens_seen": 572051744, + "step": 4529 + }, + { + "epoch": 1.161803373308536, + "loss": 0.8676299452781677, + "loss_ce": 0.0011748457327485085, + "loss_iou": 0.3984375, + "loss_num": 0.01397705078125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 572051744, + "step": 4529 + }, + { + "epoch": 1.1620598986724813, + "grad_norm": 42.37870788574219, + "learning_rate": 5e-06, + "loss": 0.8322, + "num_input_tokens_seen": 572178576, + "step": 4530 + }, + { + "epoch": 1.1620598986724813, + "loss": 0.7946346998214722, + "loss_ce": 0.001665959949605167, + "loss_iou": 0.359375, + "loss_num": 0.01513671875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 572178576, + "step": 4530 + }, + { + "epoch": 1.1623164240364265, + "grad_norm": 41.35211181640625, + "learning_rate": 5e-06, + "loss": 0.8361, + "num_input_tokens_seen": 572305588, + "step": 4531 + }, + { + "epoch": 1.1623164240364265, + "loss": 0.8207412958145142, + "loss_ce": 0.0009170278790406883, + "loss_iou": 0.38671875, + "loss_num": 0.00933837890625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 572305588, + "step": 4531 + }, + { + "epoch": 1.162572949400372, + "grad_norm": 36.05964660644531, + "learning_rate": 5e-06, + "loss": 0.9522, + "num_input_tokens_seen": 572431724, + "step": 4532 + }, + { + "epoch": 1.162572949400372, + "loss": 1.2159754037857056, + "loss_ce": 0.000643333769403398, + "loss_iou": 0.54296875, + "loss_num": 0.025634765625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 572431724, + "step": 4532 + }, + { + "epoch": 1.1628294747643173, + "grad_norm": 47.059852600097656, + "learning_rate": 5e-06, + "loss": 0.8094, + "num_input_tokens_seen": 572557224, + "step": 4533 + }, + { + "epoch": 1.1628294747643173, + "loss": 0.720871090888977, + "loss_ce": 0.0006562181515619159, + "loss_iou": 0.328125, + "loss_num": 0.01324462890625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 572557224, + "step": 4533 + }, + { + "epoch": 1.1630860001282626, + "grad_norm": 50.49955749511719, + "learning_rate": 5e-06, + "loss": 0.943, + "num_input_tokens_seen": 572683320, + "step": 4534 + }, + { + "epoch": 1.1630860001282626, + "loss": 0.9858728647232056, + "loss_ce": 0.0005212597898207605, + "loss_iou": 0.44921875, + "loss_num": 0.0174560546875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 572683320, + "step": 4534 + }, + { + "epoch": 1.163342525492208, + "grad_norm": 43.80109786987305, + "learning_rate": 5e-06, + "loss": 0.8189, + "num_input_tokens_seen": 572809836, + "step": 4535 + }, + { + "epoch": 1.163342525492208, + "loss": 0.811183750629425, + "loss_ce": 0.0018575791036710143, + "loss_iou": 0.37109375, + "loss_num": 0.01318359375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 572809836, + "step": 4535 + }, + { + "epoch": 1.1635990508561533, + "grad_norm": 45.51151657104492, + "learning_rate": 5e-06, + "loss": 0.8344, + "num_input_tokens_seen": 572936640, + "step": 4536 + }, + { + "epoch": 1.1635990508561533, + "loss": 0.867432177066803, + "loss_ce": 0.00610403111204505, + "loss_iou": 0.404296875, + "loss_num": 0.0106201171875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 572936640, + "step": 4536 + }, + { + "epoch": 1.1638555762200988, + "grad_norm": 47.10224151611328, + "learning_rate": 5e-06, + "loss": 0.7307, + "num_input_tokens_seen": 573063588, + "step": 4537 + }, + { + "epoch": 1.1638555762200988, + "loss": 0.7160865068435669, + "loss_ce": 0.0012427003821358085, + "loss_iou": 0.333984375, + "loss_num": 0.0096435546875, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 573063588, + "step": 4537 + }, + { + "epoch": 1.164112101584044, + "grad_norm": 58.58885192871094, + "learning_rate": 5e-06, + "loss": 0.9121, + "num_input_tokens_seen": 573190352, + "step": 4538 + }, + { + "epoch": 1.164112101584044, + "loss": 0.9367368221282959, + "loss_ce": 0.0007016891613602638, + "loss_iou": 0.439453125, + "loss_num": 0.0111083984375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 573190352, + "step": 4538 + }, + { + "epoch": 1.1643686269479896, + "grad_norm": 60.32862854003906, + "learning_rate": 5e-06, + "loss": 0.8151, + "num_input_tokens_seen": 573316952, + "step": 4539 + }, + { + "epoch": 1.1643686269479896, + "loss": 0.8755601644515991, + "loss_ce": 0.0005602054297924042, + "loss_iou": 0.3984375, + "loss_num": 0.015869140625, + "loss_xval": 0.875, + "num_input_tokens_seen": 573316952, + "step": 4539 + }, + { + "epoch": 1.1646251523119349, + "grad_norm": 46.5384635925293, + "learning_rate": 5e-06, + "loss": 0.8108, + "num_input_tokens_seen": 573442716, + "step": 4540 + }, + { + "epoch": 1.1646251523119349, + "loss": 1.064786434173584, + "loss_ce": 0.0017981571145355701, + "loss_iou": 0.47265625, + "loss_num": 0.0238037109375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 573442716, + "step": 4540 + }, + { + "epoch": 1.1648816776758801, + "grad_norm": 39.41315841674805, + "learning_rate": 5e-06, + "loss": 0.7988, + "num_input_tokens_seen": 573567552, + "step": 4541 + }, + { + "epoch": 1.1648816776758801, + "loss": 0.8795688152313232, + "loss_ce": 0.00017425825353711843, + "loss_iou": 0.4140625, + "loss_num": 0.01068115234375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 573567552, + "step": 4541 + }, + { + "epoch": 1.1651382030398256, + "grad_norm": 46.57070541381836, + "learning_rate": 5e-06, + "loss": 0.9397, + "num_input_tokens_seen": 573694140, + "step": 4542 + }, + { + "epoch": 1.1651382030398256, + "loss": 1.1182448863983154, + "loss_ce": 0.0025222119875252247, + "loss_iou": 0.494140625, + "loss_num": 0.025390625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 573694140, + "step": 4542 + }, + { + "epoch": 1.165394728403771, + "grad_norm": 43.3175163269043, + "learning_rate": 5e-06, + "loss": 0.7067, + "num_input_tokens_seen": 573819864, + "step": 4543 + }, + { + "epoch": 1.165394728403771, + "loss": 0.6012135744094849, + "loss_ce": 0.0001393805432599038, + "loss_iou": 0.291015625, + "loss_num": 0.0038909912109375, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 573819864, + "step": 4543 + }, + { + "epoch": 1.1656512537677162, + "grad_norm": 49.64727783203125, + "learning_rate": 5e-06, + "loss": 0.8328, + "num_input_tokens_seen": 573946140, + "step": 4544 + }, + { + "epoch": 1.1656512537677162, + "loss": 0.7432665824890137, + "loss_ce": 0.0049853576347231865, + "loss_iou": 0.34375, + "loss_num": 0.01043701171875, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 573946140, + "step": 4544 + }, + { + "epoch": 1.1659077791316617, + "grad_norm": 50.002723693847656, + "learning_rate": 5e-06, + "loss": 0.7813, + "num_input_tokens_seen": 574072236, + "step": 4545 + }, + { + "epoch": 1.1659077791316617, + "loss": 0.8294141292572021, + "loss_ce": 0.0003125669318251312, + "loss_iou": 0.3828125, + "loss_num": 0.012451171875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 574072236, + "step": 4545 + }, + { + "epoch": 1.166164304495607, + "grad_norm": 40.95685577392578, + "learning_rate": 5e-06, + "loss": 0.7915, + "num_input_tokens_seen": 574197848, + "step": 4546 + }, + { + "epoch": 1.166164304495607, + "loss": 0.7200202345848083, + "loss_ce": 0.0037116569001227617, + "loss_iou": 0.330078125, + "loss_num": 0.0113525390625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 574197848, + "step": 4546 + }, + { + "epoch": 1.1664208298595524, + "grad_norm": 44.59059143066406, + "learning_rate": 5e-06, + "loss": 0.8058, + "num_input_tokens_seen": 574323768, + "step": 4547 + }, + { + "epoch": 1.1664208298595524, + "loss": 0.8200675249099731, + "loss_ce": 0.00878816470503807, + "loss_iou": 0.37890625, + "loss_num": 0.0107421875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 574323768, + "step": 4547 + }, + { + "epoch": 1.1666773552234977, + "grad_norm": 42.14200210571289, + "learning_rate": 5e-06, + "loss": 0.8343, + "num_input_tokens_seen": 574450004, + "step": 4548 + }, + { + "epoch": 1.1666773552234977, + "loss": 0.8473623991012573, + "loss_ce": 0.00019438084564171731, + "loss_iou": 0.3984375, + "loss_num": 0.00946044921875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 574450004, + "step": 4548 + }, + { + "epoch": 1.1669338805874432, + "grad_norm": 52.06083679199219, + "learning_rate": 5e-06, + "loss": 0.8506, + "num_input_tokens_seen": 574576272, + "step": 4549 + }, + { + "epoch": 1.1669338805874432, + "loss": 0.8010478019714355, + "loss_ce": 0.0002665590145625174, + "loss_iou": 0.3828125, + "loss_num": 0.006988525390625, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 574576272, + "step": 4549 + }, + { + "epoch": 1.1671904059513885, + "grad_norm": 45.91111755371094, + "learning_rate": 5e-06, + "loss": 0.9084, + "num_input_tokens_seen": 574702896, + "step": 4550 + }, + { + "epoch": 1.1671904059513885, + "loss": 0.820820152759552, + "loss_ce": 0.000995949492789805, + "loss_iou": 0.37890625, + "loss_num": 0.01226806640625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 574702896, + "step": 4550 + }, + { + "epoch": 1.1674469313153337, + "grad_norm": 32.767738342285156, + "learning_rate": 5e-06, + "loss": 0.7923, + "num_input_tokens_seen": 574829968, + "step": 4551 + }, + { + "epoch": 1.1674469313153337, + "loss": 0.7263056039810181, + "loss_ce": 0.0004755000118166208, + "loss_iou": 0.34765625, + "loss_num": 0.005706787109375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 574829968, + "step": 4551 + }, + { + "epoch": 1.1677034566792792, + "grad_norm": 42.859649658203125, + "learning_rate": 5e-06, + "loss": 0.9388, + "num_input_tokens_seen": 574956216, + "step": 4552 + }, + { + "epoch": 1.1677034566792792, + "loss": 0.9670284986495972, + "loss_ce": 0.0007198589737527072, + "loss_iou": 0.43359375, + "loss_num": 0.01953125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 574956216, + "step": 4552 + }, + { + "epoch": 1.1679599820432245, + "grad_norm": 46.200130462646484, + "learning_rate": 5e-06, + "loss": 0.8389, + "num_input_tokens_seen": 575083716, + "step": 4553 + }, + { + "epoch": 1.1679599820432245, + "loss": 0.8094574213027954, + "loss_ce": 0.00013122055679559708, + "loss_iou": 0.390625, + "loss_num": 0.0052490234375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 575083716, + "step": 4553 + }, + { + "epoch": 1.16821650740717, + "grad_norm": 53.43812561035156, + "learning_rate": 5e-06, + "loss": 0.8887, + "num_input_tokens_seen": 575210240, + "step": 4554 + }, + { + "epoch": 1.16821650740717, + "loss": 0.8060768842697144, + "loss_ce": 0.00041285352199338377, + "loss_iou": 0.36328125, + "loss_num": 0.015869140625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 575210240, + "step": 4554 + }, + { + "epoch": 1.1684730327711152, + "grad_norm": 57.51784133911133, + "learning_rate": 5e-06, + "loss": 0.8559, + "num_input_tokens_seen": 575336440, + "step": 4555 + }, + { + "epoch": 1.1684730327711152, + "loss": 0.7801345586776733, + "loss_ce": 0.00034936339943669736, + "loss_iou": 0.353515625, + "loss_num": 0.0145263671875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 575336440, + "step": 4555 + }, + { + "epoch": 1.1687295581350605, + "grad_norm": 36.79499435424805, + "learning_rate": 5e-06, + "loss": 0.9429, + "num_input_tokens_seen": 575462568, + "step": 4556 + }, + { + "epoch": 1.1687295581350605, + "loss": 0.9999934434890747, + "loss_ce": 0.00048168140347115695, + "loss_iou": 0.453125, + "loss_num": 0.0186767578125, + "loss_xval": 1.0, + "num_input_tokens_seen": 575462568, + "step": 4556 + }, + { + "epoch": 1.168986083499006, + "grad_norm": 21.925382614135742, + "learning_rate": 5e-06, + "loss": 0.7533, + "num_input_tokens_seen": 575589196, + "step": 4557 + }, + { + "epoch": 1.168986083499006, + "loss": 0.6767134070396423, + "loss_ce": 0.00044386257650330663, + "loss_iou": 0.32421875, + "loss_num": 0.00518798828125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 575589196, + "step": 4557 + }, + { + "epoch": 1.1692426088629513, + "grad_norm": 17.59357261657715, + "learning_rate": 5e-06, + "loss": 0.9078, + "num_input_tokens_seen": 575716232, + "step": 4558 + }, + { + "epoch": 1.1692426088629513, + "loss": 0.8279498815536499, + "loss_ce": 0.0008014380000531673, + "loss_iou": 0.384765625, + "loss_num": 0.0111083984375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 575716232, + "step": 4558 + }, + { + "epoch": 1.1694991342268968, + "grad_norm": 31.002777099609375, + "learning_rate": 5e-06, + "loss": 0.8861, + "num_input_tokens_seen": 575842296, + "step": 4559 + }, + { + "epoch": 1.1694991342268968, + "loss": 0.9588915109634399, + "loss_ce": 0.0023485212586820126, + "loss_iou": 0.443359375, + "loss_num": 0.013671875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 575842296, + "step": 4559 + }, + { + "epoch": 1.169755659590842, + "grad_norm": 33.45065689086914, + "learning_rate": 5e-06, + "loss": 0.7569, + "num_input_tokens_seen": 575967748, + "step": 4560 + }, + { + "epoch": 1.169755659590842, + "loss": 0.7655437588691711, + "loss_ce": 0.0048016030341386795, + "loss_iou": 0.34375, + "loss_num": 0.01446533203125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 575967748, + "step": 4560 + }, + { + "epoch": 1.1700121849547873, + "grad_norm": 54.65034866333008, + "learning_rate": 5e-06, + "loss": 0.9685, + "num_input_tokens_seen": 576094476, + "step": 4561 + }, + { + "epoch": 1.1700121849547873, + "loss": 1.0133445262908936, + "loss_ce": 0.00309053435921669, + "loss_iou": 0.466796875, + "loss_num": 0.0155029296875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 576094476, + "step": 4561 + }, + { + "epoch": 1.1702687103187328, + "grad_norm": 51.731483459472656, + "learning_rate": 5e-06, + "loss": 0.9526, + "num_input_tokens_seen": 576221036, + "step": 4562 + }, + { + "epoch": 1.1702687103187328, + "loss": 0.9475976824760437, + "loss_ce": 0.0013086418621242046, + "loss_iou": 0.431640625, + "loss_num": 0.01611328125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 576221036, + "step": 4562 + }, + { + "epoch": 1.170525235682678, + "grad_norm": 35.754478454589844, + "learning_rate": 5e-06, + "loss": 0.909, + "num_input_tokens_seen": 576347696, + "step": 4563 + }, + { + "epoch": 1.170525235682678, + "loss": 0.8464227914810181, + "loss_ce": 0.005114208906888962, + "loss_iou": 0.390625, + "loss_num": 0.012451171875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 576347696, + "step": 4563 + }, + { + "epoch": 1.1707817610466236, + "grad_norm": 43.96115493774414, + "learning_rate": 5e-06, + "loss": 0.9723, + "num_input_tokens_seen": 576474332, + "step": 4564 + }, + { + "epoch": 1.1707817610466236, + "loss": 1.0251595973968506, + "loss_ce": 0.0002572696830611676, + "loss_iou": 0.470703125, + "loss_num": 0.0164794921875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 576474332, + "step": 4564 + }, + { + "epoch": 1.1710382864105688, + "grad_norm": 49.177467346191406, + "learning_rate": 5e-06, + "loss": 0.921, + "num_input_tokens_seen": 576600856, + "step": 4565 + }, + { + "epoch": 1.1710382864105688, + "loss": 0.9086631536483765, + "loss_ce": 0.0004599944222718477, + "loss_iou": 0.41796875, + "loss_num": 0.01416015625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 576600856, + "step": 4565 + }, + { + "epoch": 1.171294811774514, + "grad_norm": 33.261417388916016, + "learning_rate": 5e-06, + "loss": 0.8403, + "num_input_tokens_seen": 576725860, + "step": 4566 + }, + { + "epoch": 1.171294811774514, + "loss": 0.7302703857421875, + "loss_ce": 0.000778151792474091, + "loss_iou": 0.341796875, + "loss_num": 0.0091552734375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 576725860, + "step": 4566 + }, + { + "epoch": 1.1715513371384596, + "grad_norm": 34.12145233154297, + "learning_rate": 5e-06, + "loss": 0.9388, + "num_input_tokens_seen": 576852060, + "step": 4567 + }, + { + "epoch": 1.1715513371384596, + "loss": 0.9952550530433655, + "loss_ce": 0.0008702880586497486, + "loss_iou": 0.45703125, + "loss_num": 0.0157470703125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 576852060, + "step": 4567 + }, + { + "epoch": 1.1718078625024049, + "grad_norm": 34.77125549316406, + "learning_rate": 5e-06, + "loss": 0.8991, + "num_input_tokens_seen": 576977584, + "step": 4568 + }, + { + "epoch": 1.1718078625024049, + "loss": 0.806506872177124, + "loss_ce": 0.00108696601819247, + "loss_iou": 0.37109375, + "loss_num": 0.01287841796875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 576977584, + "step": 4568 + }, + { + "epoch": 1.1720643878663504, + "grad_norm": 29.107471466064453, + "learning_rate": 5e-06, + "loss": 0.802, + "num_input_tokens_seen": 577105088, + "step": 4569 + }, + { + "epoch": 1.1720643878663504, + "loss": 0.7420886158943176, + "loss_ce": 0.0011218349682167172, + "loss_iou": 0.349609375, + "loss_num": 0.00860595703125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 577105088, + "step": 4569 + }, + { + "epoch": 1.1723209132302956, + "grad_norm": 32.19477844238281, + "learning_rate": 5e-06, + "loss": 0.8479, + "num_input_tokens_seen": 577231752, + "step": 4570 + }, + { + "epoch": 1.1723209132302956, + "loss": 0.6239475011825562, + "loss_ce": 0.0009006330510601401, + "loss_iou": 0.294921875, + "loss_num": 0.006561279296875, + "loss_xval": 0.625, + "num_input_tokens_seen": 577231752, + "step": 4570 + }, + { + "epoch": 1.172577438594241, + "grad_norm": 47.64165115356445, + "learning_rate": 5e-06, + "loss": 0.8753, + "num_input_tokens_seen": 577358380, + "step": 4571 + }, + { + "epoch": 1.172577438594241, + "loss": 0.9160193800926208, + "loss_ce": 0.0009803138673305511, + "loss_iou": 0.40625, + "loss_num": 0.020751953125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 577358380, + "step": 4571 + }, + { + "epoch": 1.1728339639581864, + "grad_norm": 44.90454864501953, + "learning_rate": 5e-06, + "loss": 0.8594, + "num_input_tokens_seen": 577484748, + "step": 4572 + }, + { + "epoch": 1.1728339639581864, + "loss": 0.8819500803947449, + "loss_ce": 0.0025555454194545746, + "loss_iou": 0.41015625, + "loss_num": 0.01153564453125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 577484748, + "step": 4572 + }, + { + "epoch": 1.1730904893221317, + "grad_norm": 42.69655990600586, + "learning_rate": 5e-06, + "loss": 0.7585, + "num_input_tokens_seen": 577611792, + "step": 4573 + }, + { + "epoch": 1.1730904893221317, + "loss": 0.8984812498092651, + "loss_ce": 0.001020363182760775, + "loss_iou": 0.40625, + "loss_num": 0.0167236328125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 577611792, + "step": 4573 + }, + { + "epoch": 1.1733470146860772, + "grad_norm": 44.36417770385742, + "learning_rate": 5e-06, + "loss": 0.7653, + "num_input_tokens_seen": 577738892, + "step": 4574 + }, + { + "epoch": 1.1733470146860772, + "loss": 0.788877010345459, + "loss_ce": 0.0007910501444712281, + "loss_iou": 0.3671875, + "loss_num": 0.01055908203125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 577738892, + "step": 4574 + }, + { + "epoch": 1.1736035400500224, + "grad_norm": 41.73553466796875, + "learning_rate": 5e-06, + "loss": 0.782, + "num_input_tokens_seen": 577864348, + "step": 4575 + }, + { + "epoch": 1.1736035400500224, + "loss": 0.9593762159347534, + "loss_ce": 0.003809826448559761, + "loss_iou": 0.43359375, + "loss_num": 0.017822265625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 577864348, + "step": 4575 + }, + { + "epoch": 1.173860065413968, + "grad_norm": 54.07448196411133, + "learning_rate": 5e-06, + "loss": 0.9085, + "num_input_tokens_seen": 577990676, + "step": 4576 + }, + { + "epoch": 1.173860065413968, + "loss": 0.9043574929237366, + "loss_ce": 0.003966839052736759, + "loss_iou": 0.408203125, + "loss_num": 0.016845703125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 577990676, + "step": 4576 + }, + { + "epoch": 1.1741165907779132, + "grad_norm": 67.6137466430664, + "learning_rate": 5e-06, + "loss": 0.8764, + "num_input_tokens_seen": 578118024, + "step": 4577 + }, + { + "epoch": 1.1741165907779132, + "loss": 0.8529617190361023, + "loss_ce": 0.0018875104142352939, + "loss_iou": 0.40234375, + "loss_num": 0.00933837890625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 578118024, + "step": 4577 + }, + { + "epoch": 1.1743731161418585, + "grad_norm": 46.12276840209961, + "learning_rate": 5e-06, + "loss": 0.9218, + "num_input_tokens_seen": 578243884, + "step": 4578 + }, + { + "epoch": 1.1743731161418585, + "loss": 0.9537265300750732, + "loss_ce": 0.00011331253335811198, + "loss_iou": 0.431640625, + "loss_num": 0.0181884765625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 578243884, + "step": 4578 + }, + { + "epoch": 1.174629641505804, + "grad_norm": 20.67555809020996, + "learning_rate": 5e-06, + "loss": 0.8074, + "num_input_tokens_seen": 578370012, + "step": 4579 + }, + { + "epoch": 1.174629641505804, + "loss": 0.7617918252944946, + "loss_ce": 7.310444198083133e-05, + "loss_iou": 0.3515625, + "loss_num": 0.01190185546875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 578370012, + "step": 4579 + }, + { + "epoch": 1.1748861668697492, + "grad_norm": 21.953819274902344, + "learning_rate": 5e-06, + "loss": 0.9685, + "num_input_tokens_seen": 578495492, + "step": 4580 + }, + { + "epoch": 1.1748861668697492, + "loss": 0.8596168756484985, + "loss_ce": 0.0002418872609268874, + "loss_iou": 0.40625, + "loss_num": 0.0093994140625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 578495492, + "step": 4580 + }, + { + "epoch": 1.1751426922336945, + "grad_norm": 40.53633117675781, + "learning_rate": 5e-06, + "loss": 0.8835, + "num_input_tokens_seen": 578622524, + "step": 4581 + }, + { + "epoch": 1.1751426922336945, + "loss": 1.051048755645752, + "loss_ce": 0.0017324090003967285, + "loss_iou": 0.470703125, + "loss_num": 0.021728515625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 578622524, + "step": 4581 + }, + { + "epoch": 1.17539921759764, + "grad_norm": 30.6467227935791, + "learning_rate": 5e-06, + "loss": 0.8191, + "num_input_tokens_seen": 578749152, + "step": 4582 + }, + { + "epoch": 1.17539921759764, + "loss": 0.811220645904541, + "loss_ce": 0.003359347814694047, + "loss_iou": 0.380859375, + "loss_num": 0.00927734375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 578749152, + "step": 4582 + }, + { + "epoch": 1.1756557429615853, + "grad_norm": 45.24897384643555, + "learning_rate": 5e-06, + "loss": 0.8285, + "num_input_tokens_seen": 578875744, + "step": 4583 + }, + { + "epoch": 1.1756557429615853, + "loss": 0.7563344240188599, + "loss_ce": 0.0009633263107389212, + "loss_iou": 0.3515625, + "loss_num": 0.01043701171875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 578875744, + "step": 4583 + }, + { + "epoch": 1.1759122683255308, + "grad_norm": 37.89247131347656, + "learning_rate": 5e-06, + "loss": 0.894, + "num_input_tokens_seen": 579000688, + "step": 4584 + }, + { + "epoch": 1.1759122683255308, + "loss": 0.8557764291763306, + "loss_ce": 0.007631906308233738, + "loss_iou": 0.40234375, + "loss_num": 0.00836181640625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 579000688, + "step": 4584 + }, + { + "epoch": 1.176168793689476, + "grad_norm": 36.54160690307617, + "learning_rate": 5e-06, + "loss": 0.7979, + "num_input_tokens_seen": 579126688, + "step": 4585 + }, + { + "epoch": 1.176168793689476, + "loss": 0.843592643737793, + "loss_ce": 0.0005750389536842704, + "loss_iou": 0.384765625, + "loss_num": 0.014892578125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 579126688, + "step": 4585 + }, + { + "epoch": 1.1764253190534215, + "grad_norm": 40.365177154541016, + "learning_rate": 5e-06, + "loss": 0.7892, + "num_input_tokens_seen": 579253388, + "step": 4586 + }, + { + "epoch": 1.1764253190534215, + "loss": 0.7280598282814026, + "loss_ce": 0.00027660897467285395, + "loss_iou": 0.34765625, + "loss_num": 0.00653076171875, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 579253388, + "step": 4586 + }, + { + "epoch": 1.1766818444173668, + "grad_norm": 55.537620544433594, + "learning_rate": 5e-06, + "loss": 0.8995, + "num_input_tokens_seen": 579380672, + "step": 4587 + }, + { + "epoch": 1.1766818444173668, + "loss": 0.7508025169372559, + "loss_ce": 0.0015349689638242126, + "loss_iou": 0.34765625, + "loss_num": 0.0108642578125, + "loss_xval": 0.75, + "num_input_tokens_seen": 579380672, + "step": 4587 + }, + { + "epoch": 1.176938369781312, + "grad_norm": 50.71245193481445, + "learning_rate": 5e-06, + "loss": 0.8281, + "num_input_tokens_seen": 579507812, + "step": 4588 + }, + { + "epoch": 1.176938369781312, + "loss": 0.7760930061340332, + "loss_ce": 0.0007023385260254145, + "loss_iou": 0.369140625, + "loss_num": 0.007049560546875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 579507812, + "step": 4588 + }, + { + "epoch": 1.1771948951452575, + "grad_norm": 45.852439880371094, + "learning_rate": 5e-06, + "loss": 0.8337, + "num_input_tokens_seen": 579634284, + "step": 4589 + }, + { + "epoch": 1.1771948951452575, + "loss": 0.8098774552345276, + "loss_ce": 0.0017719878815114498, + "loss_iou": 0.365234375, + "loss_num": 0.01556396484375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 579634284, + "step": 4589 + }, + { + "epoch": 1.1774514205092028, + "grad_norm": 47.360347747802734, + "learning_rate": 5e-06, + "loss": 0.9107, + "num_input_tokens_seen": 579760808, + "step": 4590 + }, + { + "epoch": 1.1774514205092028, + "loss": 0.8430802226066589, + "loss_ce": 0.001283317687921226, + "loss_iou": 0.404296875, + "loss_num": 0.0064697265625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 579760808, + "step": 4590 + }, + { + "epoch": 1.177707945873148, + "grad_norm": 52.954647064208984, + "learning_rate": 5e-06, + "loss": 0.8496, + "num_input_tokens_seen": 579885896, + "step": 4591 + }, + { + "epoch": 1.177707945873148, + "loss": 0.7786237001419067, + "loss_ce": 0.0005475407233461738, + "loss_iou": 0.359375, + "loss_num": 0.01171875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 579885896, + "step": 4591 + }, + { + "epoch": 1.1779644712370936, + "grad_norm": 33.68499755859375, + "learning_rate": 5e-06, + "loss": 0.8974, + "num_input_tokens_seen": 580011784, + "step": 4592 + }, + { + "epoch": 1.1779644712370936, + "loss": 0.8921992778778076, + "loss_ce": 0.0008418389479629695, + "loss_iou": 0.404296875, + "loss_num": 0.016357421875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 580011784, + "step": 4592 + }, + { + "epoch": 1.1782209966010389, + "grad_norm": 20.487777709960938, + "learning_rate": 5e-06, + "loss": 0.8923, + "num_input_tokens_seen": 580137836, + "step": 4593 + }, + { + "epoch": 1.1782209966010389, + "loss": 1.0280570983886719, + "loss_ce": 0.000469218532089144, + "loss_iou": 0.46484375, + "loss_num": 0.0196533203125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 580137836, + "step": 4593 + }, + { + "epoch": 1.1784775219649843, + "grad_norm": 42.857479095458984, + "learning_rate": 5e-06, + "loss": 0.8056, + "num_input_tokens_seen": 580263624, + "step": 4594 + }, + { + "epoch": 1.1784775219649843, + "loss": 0.7145461440086365, + "loss_ce": 0.001411376055330038, + "loss_iou": 0.341796875, + "loss_num": 0.006072998046875, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 580263624, + "step": 4594 + }, + { + "epoch": 1.1787340473289296, + "grad_norm": 42.474464416503906, + "learning_rate": 5e-06, + "loss": 0.8184, + "num_input_tokens_seen": 580389184, + "step": 4595 + }, + { + "epoch": 1.1787340473289296, + "loss": 0.8914209604263306, + "loss_ce": 0.00030769355362281203, + "loss_iou": 0.412109375, + "loss_num": 0.01324462890625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 580389184, + "step": 4595 + }, + { + "epoch": 1.178990572692875, + "grad_norm": 60.3443489074707, + "learning_rate": 5e-06, + "loss": 0.7957, + "num_input_tokens_seen": 580515988, + "step": 4596 + }, + { + "epoch": 1.178990572692875, + "loss": 0.8500913381576538, + "loss_ce": 0.0019468243699520826, + "loss_iou": 0.400390625, + "loss_num": 0.0096435546875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 580515988, + "step": 4596 + }, + { + "epoch": 1.1792470980568204, + "grad_norm": 47.67316436767578, + "learning_rate": 5e-06, + "loss": 0.8917, + "num_input_tokens_seen": 580643564, + "step": 4597 + }, + { + "epoch": 1.1792470980568204, + "loss": 0.7808536291122437, + "loss_ce": 9.19197773328051e-05, + "loss_iou": 0.3671875, + "loss_num": 0.00927734375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 580643564, + "step": 4597 + }, + { + "epoch": 1.1795036234207656, + "grad_norm": 32.87125015258789, + "learning_rate": 5e-06, + "loss": 0.7381, + "num_input_tokens_seen": 580769108, + "step": 4598 + }, + { + "epoch": 1.1795036234207656, + "loss": 0.8275707960128784, + "loss_ce": 0.0013989085564389825, + "loss_iou": 0.380859375, + "loss_num": 0.01275634765625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 580769108, + "step": 4598 + }, + { + "epoch": 1.1797601487847111, + "grad_norm": 32.77552032470703, + "learning_rate": 5e-06, + "loss": 0.7536, + "num_input_tokens_seen": 580895112, + "step": 4599 + }, + { + "epoch": 1.1797601487847111, + "loss": 0.6803081035614014, + "loss_ce": 0.0006205724785104394, + "loss_iou": 0.328125, + "loss_num": 0.004547119140625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 580895112, + "step": 4599 + }, + { + "epoch": 1.1800166741486564, + "grad_norm": 40.71432876586914, + "learning_rate": 5e-06, + "loss": 0.8341, + "num_input_tokens_seen": 581020656, + "step": 4600 + }, + { + "epoch": 1.1800166741486564, + "loss": 0.8069639205932617, + "loss_ce": 0.0012998477322980762, + "loss_iou": 0.37890625, + "loss_num": 0.010009765625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 581020656, + "step": 4600 + }, + { + "epoch": 1.180273199512602, + "grad_norm": 45.228065490722656, + "learning_rate": 5e-06, + "loss": 0.7992, + "num_input_tokens_seen": 581147680, + "step": 4601 + }, + { + "epoch": 1.180273199512602, + "loss": 0.9379592537879944, + "loss_ce": 0.0009475365513935685, + "loss_iou": 0.431640625, + "loss_num": 0.01495361328125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 581147680, + "step": 4601 + }, + { + "epoch": 1.1805297248765472, + "grad_norm": 59.00119400024414, + "learning_rate": 5e-06, + "loss": 0.8358, + "num_input_tokens_seen": 581275000, + "step": 4602 + }, + { + "epoch": 1.1805297248765472, + "loss": 1.0336050987243652, + "loss_ce": 0.0018668161937966943, + "loss_iou": 0.466796875, + "loss_num": 0.0201416015625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 581275000, + "step": 4602 + }, + { + "epoch": 1.1807862502404924, + "grad_norm": 52.54079055786133, + "learning_rate": 5e-06, + "loss": 0.806, + "num_input_tokens_seen": 581400828, + "step": 4603 + }, + { + "epoch": 1.1807862502404924, + "loss": 0.6920989155769348, + "loss_ce": 0.0011809266870841384, + "loss_iou": 0.32421875, + "loss_num": 0.0081787109375, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 581400828, + "step": 4603 + }, + { + "epoch": 1.181042775604438, + "grad_norm": 30.077484130859375, + "learning_rate": 5e-06, + "loss": 0.8948, + "num_input_tokens_seen": 581526864, + "step": 4604 + }, + { + "epoch": 1.181042775604438, + "loss": 0.7204556465148926, + "loss_ce": 0.0031705095898360014, + "loss_iou": 0.330078125, + "loss_num": 0.010986328125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 581526864, + "step": 4604 + }, + { + "epoch": 1.1812993009683832, + "grad_norm": 30.17238426208496, + "learning_rate": 5e-06, + "loss": 0.7961, + "num_input_tokens_seen": 581653804, + "step": 4605 + }, + { + "epoch": 1.1812993009683832, + "loss": 0.7269279956817627, + "loss_ce": 0.00012132945994380862, + "loss_iou": 0.34765625, + "loss_num": 0.00634765625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 581653804, + "step": 4605 + }, + { + "epoch": 1.1815558263323287, + "grad_norm": 36.594268798828125, + "learning_rate": 5e-06, + "loss": 0.8876, + "num_input_tokens_seen": 581779988, + "step": 4606 + }, + { + "epoch": 1.1815558263323287, + "loss": 0.9021351337432861, + "loss_ce": 0.002721048891544342, + "loss_iou": 0.4140625, + "loss_num": 0.0145263671875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 581779988, + "step": 4606 + }, + { + "epoch": 1.181812351696274, + "grad_norm": 32.78271484375, + "learning_rate": 5e-06, + "loss": 0.8611, + "num_input_tokens_seen": 581905292, + "step": 4607 + }, + { + "epoch": 1.181812351696274, + "loss": 0.8906292915344238, + "loss_ce": 0.0019573902245610952, + "loss_iou": 0.41796875, + "loss_num": 0.01043701171875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 581905292, + "step": 4607 + }, + { + "epoch": 1.1820688770602192, + "grad_norm": 27.876537322998047, + "learning_rate": 5e-06, + "loss": 0.8281, + "num_input_tokens_seen": 582031600, + "step": 4608 + }, + { + "epoch": 1.1820688770602192, + "loss": 0.772733211517334, + "loss_ce": 0.00027225870871916413, + "loss_iou": 0.359375, + "loss_num": 0.0108642578125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 582031600, + "step": 4608 + }, + { + "epoch": 1.1823254024241647, + "grad_norm": 30.16394805908203, + "learning_rate": 5e-06, + "loss": 0.7993, + "num_input_tokens_seen": 582158456, + "step": 4609 + }, + { + "epoch": 1.1823254024241647, + "loss": 0.7312488555908203, + "loss_ce": 0.003709780750796199, + "loss_iou": 0.34765625, + "loss_num": 0.006256103515625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 582158456, + "step": 4609 + }, + { + "epoch": 1.18258192778811, + "grad_norm": 33.31243133544922, + "learning_rate": 5e-06, + "loss": 0.9087, + "num_input_tokens_seen": 582283644, + "step": 4610 + }, + { + "epoch": 1.18258192778811, + "loss": 0.6896200180053711, + "loss_ce": 0.0016317334957420826, + "loss_iou": 0.326171875, + "loss_num": 0.007080078125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 582283644, + "step": 4610 + }, + { + "epoch": 1.1828384531520555, + "grad_norm": 37.25088882446289, + "learning_rate": 5e-06, + "loss": 0.8791, + "num_input_tokens_seen": 582410640, + "step": 4611 + }, + { + "epoch": 1.1828384531520555, + "loss": 0.7652536630630493, + "loss_ce": 0.00011700352479238063, + "loss_iou": 0.359375, + "loss_num": 0.0091552734375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 582410640, + "step": 4611 + }, + { + "epoch": 1.1830949785160008, + "grad_norm": 61.7139892578125, + "learning_rate": 5e-06, + "loss": 0.8195, + "num_input_tokens_seen": 582537020, + "step": 4612 + }, + { + "epoch": 1.1830949785160008, + "loss": 0.9085956811904907, + "loss_ce": 0.0028339552227407694, + "loss_iou": 0.390625, + "loss_num": 0.0245361328125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 582537020, + "step": 4612 + }, + { + "epoch": 1.1833515038799463, + "grad_norm": 41.286529541015625, + "learning_rate": 5e-06, + "loss": 0.923, + "num_input_tokens_seen": 582663688, + "step": 4613 + }, + { + "epoch": 1.1833515038799463, + "loss": 0.9782660007476807, + "loss_ce": 0.002191841369494796, + "loss_iou": 0.45703125, + "loss_num": 0.0125732421875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 582663688, + "step": 4613 + }, + { + "epoch": 1.1836080292438915, + "grad_norm": 40.64524841308594, + "learning_rate": 5e-06, + "loss": 0.8724, + "num_input_tokens_seen": 582790044, + "step": 4614 + }, + { + "epoch": 1.1836080292438915, + "loss": 0.832642674446106, + "loss_ce": 0.0001231193746207282, + "loss_iou": 0.376953125, + "loss_num": 0.0155029296875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 582790044, + "step": 4614 + }, + { + "epoch": 1.1838645546078368, + "grad_norm": 34.41304397583008, + "learning_rate": 5e-06, + "loss": 0.7558, + "num_input_tokens_seen": 582916008, + "step": 4615 + }, + { + "epoch": 1.1838645546078368, + "loss": 0.7711011171340942, + "loss_ce": 0.00010501577344257385, + "loss_iou": 0.37109375, + "loss_num": 0.0059814453125, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 582916008, + "step": 4615 + }, + { + "epoch": 1.1841210799717823, + "grad_norm": 35.31270980834961, + "learning_rate": 5e-06, + "loss": 0.9353, + "num_input_tokens_seen": 583043020, + "step": 4616 + }, + { + "epoch": 1.1841210799717823, + "loss": 0.93311607837677, + "loss_ce": 0.0004988645669072866, + "loss_iou": 0.42578125, + "loss_num": 0.0164794921875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 583043020, + "step": 4616 + }, + { + "epoch": 1.1843776053357276, + "grad_norm": 33.067588806152344, + "learning_rate": 5e-06, + "loss": 0.9322, + "num_input_tokens_seen": 583169340, + "step": 4617 + }, + { + "epoch": 1.1843776053357276, + "loss": 1.1281083822250366, + "loss_ce": 0.0031083454377949238, + "loss_iou": 0.49609375, + "loss_num": 0.0264892578125, + "loss_xval": 1.125, + "num_input_tokens_seen": 583169340, + "step": 4617 + }, + { + "epoch": 1.1846341306996728, + "grad_norm": 30.148147583007812, + "learning_rate": 5e-06, + "loss": 0.7761, + "num_input_tokens_seen": 583297076, + "step": 4618 + }, + { + "epoch": 1.1846341306996728, + "loss": 0.6387446522712708, + "loss_ce": 0.00031693995697423816, + "loss_iou": 0.30859375, + "loss_num": 0.0040283203125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 583297076, + "step": 4618 + }, + { + "epoch": 1.1848906560636183, + "grad_norm": 51.00368118286133, + "learning_rate": 5e-06, + "loss": 0.783, + "num_input_tokens_seen": 583423480, + "step": 4619 + }, + { + "epoch": 1.1848906560636183, + "loss": 0.7757353782653809, + "loss_ce": 0.00010061202920041978, + "loss_iou": 0.359375, + "loss_num": 0.01141357421875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 583423480, + "step": 4619 + }, + { + "epoch": 1.1851471814275636, + "grad_norm": 48.44395065307617, + "learning_rate": 5e-06, + "loss": 0.9544, + "num_input_tokens_seen": 583548696, + "step": 4620 + }, + { + "epoch": 1.1851471814275636, + "loss": 0.9057371616363525, + "loss_ce": 0.006811385042965412, + "loss_iou": 0.4140625, + "loss_num": 0.01397705078125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 583548696, + "step": 4620 + }, + { + "epoch": 1.185403706791509, + "grad_norm": 32.206302642822266, + "learning_rate": 5e-06, + "loss": 0.8646, + "num_input_tokens_seen": 583674944, + "step": 4621 + }, + { + "epoch": 1.185403706791509, + "loss": 0.8563874959945679, + "loss_ce": 0.0004304995818529278, + "loss_iou": 0.3984375, + "loss_num": 0.0113525390625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 583674944, + "step": 4621 + }, + { + "epoch": 1.1856602321554544, + "grad_norm": 43.31064224243164, + "learning_rate": 5e-06, + "loss": 0.8818, + "num_input_tokens_seen": 583801380, + "step": 4622 + }, + { + "epoch": 1.1856602321554544, + "loss": 0.8135311603546143, + "loss_ce": 0.0005429055308923125, + "loss_iou": 0.384765625, + "loss_num": 0.0087890625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 583801380, + "step": 4622 + }, + { + "epoch": 1.1859167575193998, + "grad_norm": 54.82998275756836, + "learning_rate": 5e-06, + "loss": 0.8837, + "num_input_tokens_seen": 583928528, + "step": 4623 + }, + { + "epoch": 1.1859167575193998, + "loss": 0.9326336979866028, + "loss_ce": 0.00026066412101499736, + "loss_iou": 0.439453125, + "loss_num": 0.0107421875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 583928528, + "step": 4623 + }, + { + "epoch": 1.1861732828833451, + "grad_norm": 37.38352584838867, + "learning_rate": 5e-06, + "loss": 0.8573, + "num_input_tokens_seen": 584054056, + "step": 4624 + }, + { + "epoch": 1.1861732828833451, + "loss": 0.9269771575927734, + "loss_ce": 0.0031490386463701725, + "loss_iou": 0.427734375, + "loss_num": 0.013671875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 584054056, + "step": 4624 + }, + { + "epoch": 1.1864298082472904, + "grad_norm": 19.562702178955078, + "learning_rate": 5e-06, + "loss": 0.7126, + "num_input_tokens_seen": 584180084, + "step": 4625 + }, + { + "epoch": 1.1864298082472904, + "loss": 0.8939405083656311, + "loss_ce": 0.003803753526881337, + "loss_iou": 0.4140625, + "loss_num": 0.01275634765625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 584180084, + "step": 4625 + }, + { + "epoch": 1.1866863336112359, + "grad_norm": 21.646366119384766, + "learning_rate": 5e-06, + "loss": 0.8128, + "num_input_tokens_seen": 584306440, + "step": 4626 + }, + { + "epoch": 1.1866863336112359, + "loss": 0.7637436985969543, + "loss_ce": 7.181673572631553e-05, + "loss_iou": 0.357421875, + "loss_num": 0.01007080078125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 584306440, + "step": 4626 + }, + { + "epoch": 1.1869428589751811, + "grad_norm": 180.05006408691406, + "learning_rate": 5e-06, + "loss": 0.8308, + "num_input_tokens_seen": 584433496, + "step": 4627 + }, + { + "epoch": 1.1869428589751811, + "loss": 0.7550105452537537, + "loss_ce": 0.000616027566138655, + "loss_iou": 0.357421875, + "loss_num": 0.0081787109375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 584433496, + "step": 4627 + }, + { + "epoch": 1.1871993843391264, + "grad_norm": 25.550289154052734, + "learning_rate": 5e-06, + "loss": 0.8847, + "num_input_tokens_seen": 584558644, + "step": 4628 + }, + { + "epoch": 1.1871993843391264, + "loss": 0.7627483606338501, + "loss_ce": 0.0034710506442934275, + "loss_iou": 0.35546875, + "loss_num": 0.00994873046875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 584558644, + "step": 4628 + }, + { + "epoch": 1.187455909703072, + "grad_norm": 20.698692321777344, + "learning_rate": 5e-06, + "loss": 0.8437, + "num_input_tokens_seen": 584684228, + "step": 4629 + }, + { + "epoch": 1.187455909703072, + "loss": 0.8831948041915894, + "loss_ce": 0.0020912904292345047, + "loss_iou": 0.404296875, + "loss_num": 0.014404296875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 584684228, + "step": 4629 + }, + { + "epoch": 1.1877124350670172, + "grad_norm": 19.390216827392578, + "learning_rate": 5e-06, + "loss": 0.9155, + "num_input_tokens_seen": 584810148, + "step": 4630 + }, + { + "epoch": 1.1877124350670172, + "loss": 0.7660215497016907, + "loss_ce": 0.0001524364051874727, + "loss_iou": 0.357421875, + "loss_num": 0.010498046875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 584810148, + "step": 4630 + }, + { + "epoch": 1.1879689604309627, + "grad_norm": 27.87102699279785, + "learning_rate": 5e-06, + "loss": 0.7749, + "num_input_tokens_seen": 584936832, + "step": 4631 + }, + { + "epoch": 1.1879689604309627, + "loss": 0.7608210444450378, + "loss_ce": 0.0005671288236044347, + "loss_iou": 0.357421875, + "loss_num": 0.009033203125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 584936832, + "step": 4631 + }, + { + "epoch": 1.188225485794908, + "grad_norm": 37.91584396362305, + "learning_rate": 5e-06, + "loss": 0.8992, + "num_input_tokens_seen": 585062680, + "step": 4632 + }, + { + "epoch": 1.188225485794908, + "loss": 0.9365710020065308, + "loss_ce": 0.0007800552411936224, + "loss_iou": 0.41796875, + "loss_num": 0.019775390625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 585062680, + "step": 4632 + }, + { + "epoch": 1.1884820111588534, + "grad_norm": 29.9906005859375, + "learning_rate": 5e-06, + "loss": 0.8634, + "num_input_tokens_seen": 585188408, + "step": 4633 + }, + { + "epoch": 1.1884820111588534, + "loss": 0.8817934393882751, + "loss_ce": 0.004352046176791191, + "loss_iou": 0.39453125, + "loss_num": 0.017822265625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 585188408, + "step": 4633 + }, + { + "epoch": 1.1887385365227987, + "grad_norm": 23.26939582824707, + "learning_rate": 5e-06, + "loss": 0.8781, + "num_input_tokens_seen": 585314268, + "step": 4634 + }, + { + "epoch": 1.1887385365227987, + "loss": 0.8205181360244751, + "loss_ce": 0.0006939330487512052, + "loss_iou": 0.388671875, + "loss_num": 0.00885009765625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 585314268, + "step": 4634 + }, + { + "epoch": 1.188995061886744, + "grad_norm": 20.155792236328125, + "learning_rate": 5e-06, + "loss": 0.915, + "num_input_tokens_seen": 585440676, + "step": 4635 + }, + { + "epoch": 1.188995061886744, + "loss": 0.8193739652633667, + "loss_ce": 0.003211826318874955, + "loss_iou": 0.380859375, + "loss_num": 0.0108642578125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 585440676, + "step": 4635 + }, + { + "epoch": 1.1892515872506895, + "grad_norm": 46.44293975830078, + "learning_rate": 5e-06, + "loss": 0.8219, + "num_input_tokens_seen": 585567300, + "step": 4636 + }, + { + "epoch": 1.1892515872506895, + "loss": 0.858420193195343, + "loss_ce": 0.001486591063439846, + "loss_iou": 0.3984375, + "loss_num": 0.011962890625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 585567300, + "step": 4636 + }, + { + "epoch": 1.1895081126146347, + "grad_norm": 43.82530212402344, + "learning_rate": 5e-06, + "loss": 0.9696, + "num_input_tokens_seen": 585693812, + "step": 4637 + }, + { + "epoch": 1.1895081126146347, + "loss": 0.8724066019058228, + "loss_ce": 0.0018010998610407114, + "loss_iou": 0.40625, + "loss_num": 0.0115966796875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 585693812, + "step": 4637 + }, + { + "epoch": 1.18976463797858, + "grad_norm": 40.798789978027344, + "learning_rate": 5e-06, + "loss": 0.7907, + "num_input_tokens_seen": 585821420, + "step": 4638 + }, + { + "epoch": 1.18976463797858, + "loss": 0.8947895765304565, + "loss_ce": 0.0012349168537184596, + "loss_iou": 0.41796875, + "loss_num": 0.01141357421875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 585821420, + "step": 4638 + }, + { + "epoch": 1.1900211633425255, + "grad_norm": 28.10952377319336, + "learning_rate": 5e-06, + "loss": 0.8183, + "num_input_tokens_seen": 585946748, + "step": 4639 + }, + { + "epoch": 1.1900211633425255, + "loss": 0.9362611770629883, + "loss_ce": 0.00022602431999985129, + "loss_iou": 0.423828125, + "loss_num": 0.017822265625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 585946748, + "step": 4639 + }, + { + "epoch": 1.1902776887064708, + "grad_norm": 28.255464553833008, + "learning_rate": 5e-06, + "loss": 0.717, + "num_input_tokens_seen": 586073576, + "step": 4640 + }, + { + "epoch": 1.1902776887064708, + "loss": 0.7875838279724121, + "loss_ce": 0.0007185916765592992, + "loss_iou": 0.375, + "loss_num": 0.007354736328125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 586073576, + "step": 4640 + }, + { + "epoch": 1.1905342140704163, + "grad_norm": 35.1292724609375, + "learning_rate": 5e-06, + "loss": 0.786, + "num_input_tokens_seen": 586199916, + "step": 4641 + }, + { + "epoch": 1.1905342140704163, + "loss": 0.7188585996627808, + "loss_ce": 0.0003527055087033659, + "loss_iou": 0.34375, + "loss_num": 0.00616455078125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 586199916, + "step": 4641 + }, + { + "epoch": 1.1907907394343615, + "grad_norm": 37.372161865234375, + "learning_rate": 5e-06, + "loss": 0.8346, + "num_input_tokens_seen": 586325908, + "step": 4642 + }, + { + "epoch": 1.1907907394343615, + "loss": 0.9119275808334351, + "loss_ce": 0.0029919964727014303, + "loss_iou": 0.40234375, + "loss_num": 0.020751953125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 586325908, + "step": 4642 + }, + { + "epoch": 1.191047264798307, + "grad_norm": 42.02035140991211, + "learning_rate": 5e-06, + "loss": 0.9261, + "num_input_tokens_seen": 586451756, + "step": 4643 + }, + { + "epoch": 1.191047264798307, + "loss": 0.9088037610054016, + "loss_ce": 0.003042031079530716, + "loss_iou": 0.423828125, + "loss_num": 0.01214599609375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 586451756, + "step": 4643 + }, + { + "epoch": 1.1913037901622523, + "grad_norm": 44.763221740722656, + "learning_rate": 5e-06, + "loss": 0.8615, + "num_input_tokens_seen": 586577904, + "step": 4644 + }, + { + "epoch": 1.1913037901622523, + "loss": 1.0128493309020996, + "loss_ce": 0.0016188411973416805, + "loss_iou": 0.45703125, + "loss_num": 0.0191650390625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 586577904, + "step": 4644 + }, + { + "epoch": 1.1915603155261976, + "grad_norm": 47.281131744384766, + "learning_rate": 5e-06, + "loss": 0.9077, + "num_input_tokens_seen": 586703092, + "step": 4645 + }, + { + "epoch": 1.1915603155261976, + "loss": 0.9772860407829285, + "loss_ce": 0.0007235530647449195, + "loss_iou": 0.451171875, + "loss_num": 0.01507568359375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 586703092, + "step": 4645 + }, + { + "epoch": 1.191816840890143, + "grad_norm": 62.982173919677734, + "learning_rate": 5e-06, + "loss": 0.8827, + "num_input_tokens_seen": 586829540, + "step": 4646 + }, + { + "epoch": 1.191816840890143, + "loss": 0.7612956166267395, + "loss_ce": 0.00201825937256217, + "loss_iou": 0.361328125, + "loss_num": 0.007171630859375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 586829540, + "step": 4646 + }, + { + "epoch": 1.1920733662540883, + "grad_norm": 46.269813537597656, + "learning_rate": 5e-06, + "loss": 0.8701, + "num_input_tokens_seen": 586955200, + "step": 4647 + }, + { + "epoch": 1.1920733662540883, + "loss": 0.9587010145187378, + "loss_ce": 0.0045994040556252, + "loss_iou": 0.43359375, + "loss_num": 0.0174560546875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 586955200, + "step": 4647 + }, + { + "epoch": 1.1923298916180338, + "grad_norm": 37.0291748046875, + "learning_rate": 5e-06, + "loss": 0.9306, + "num_input_tokens_seen": 587080492, + "step": 4648 + }, + { + "epoch": 1.1923298916180338, + "loss": 0.9467495679855347, + "loss_ce": 0.0029018791392445564, + "loss_iou": 0.44140625, + "loss_num": 0.012451171875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 587080492, + "step": 4648 + }, + { + "epoch": 1.192586416981979, + "grad_norm": 29.64719581604004, + "learning_rate": 5e-06, + "loss": 0.8031, + "num_input_tokens_seen": 587207360, + "step": 4649 + }, + { + "epoch": 1.192586416981979, + "loss": 0.7520864009857178, + "loss_ce": 0.002086362801492214, + "loss_iou": 0.349609375, + "loss_num": 0.01007080078125, + "loss_xval": 0.75, + "num_input_tokens_seen": 587207360, + "step": 4649 + }, + { + "epoch": 1.1928429423459244, + "grad_norm": 63.815574645996094, + "learning_rate": 5e-06, + "loss": 0.81, + "num_input_tokens_seen": 587334036, + "step": 4650 + }, + { + "epoch": 1.1928429423459244, + "loss": 0.6849693655967712, + "loss_ce": 0.00015494032413698733, + "loss_iou": 0.33203125, + "loss_num": 0.00457763671875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 587334036, + "step": 4650 + }, + { + "epoch": 1.1930994677098699, + "grad_norm": 45.678016662597656, + "learning_rate": 5e-06, + "loss": 0.9446, + "num_input_tokens_seen": 587460460, + "step": 4651 + }, + { + "epoch": 1.1930994677098699, + "loss": 0.8459632396697998, + "loss_ce": 0.0005042395205236971, + "loss_iou": 0.396484375, + "loss_num": 0.0106201171875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 587460460, + "step": 4651 + }, + { + "epoch": 1.1933559930738151, + "grad_norm": 32.6883430480957, + "learning_rate": 5e-06, + "loss": 0.7839, + "num_input_tokens_seen": 587586348, + "step": 4652 + }, + { + "epoch": 1.1933559930738151, + "loss": 0.7450886368751526, + "loss_ce": 0.0011921708937734365, + "loss_iou": 0.3515625, + "loss_num": 0.00799560546875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 587586348, + "step": 4652 + }, + { + "epoch": 1.1936125184377606, + "grad_norm": 32.99615478515625, + "learning_rate": 5e-06, + "loss": 0.773, + "num_input_tokens_seen": 587713368, + "step": 4653 + }, + { + "epoch": 1.1936125184377606, + "loss": 0.6471946835517883, + "loss_ce": 0.0004662003193516284, + "loss_iou": 0.298828125, + "loss_num": 0.009521484375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 587713368, + "step": 4653 + }, + { + "epoch": 1.1938690438017059, + "grad_norm": 42.93080139160156, + "learning_rate": 5e-06, + "loss": 0.8121, + "num_input_tokens_seen": 587839336, + "step": 4654 + }, + { + "epoch": 1.1938690438017059, + "loss": 0.8673266768455505, + "loss_ce": 0.0025805868208408356, + "loss_iou": 0.408203125, + "loss_num": 0.0093994140625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 587839336, + "step": 4654 + }, + { + "epoch": 1.1941255691656512, + "grad_norm": 41.869651794433594, + "learning_rate": 5e-06, + "loss": 0.8742, + "num_input_tokens_seen": 587965604, + "step": 4655 + }, + { + "epoch": 1.1941255691656512, + "loss": 0.7224563360214233, + "loss_ce": 4.4179825636092573e-05, + "loss_iou": 0.34375, + "loss_num": 0.006805419921875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 587965604, + "step": 4655 + }, + { + "epoch": 1.1943820945295966, + "grad_norm": 46.68687438964844, + "learning_rate": 5e-06, + "loss": 0.7552, + "num_input_tokens_seen": 588091468, + "step": 4656 + }, + { + "epoch": 1.1943820945295966, + "loss": 0.7505638599395752, + "loss_ce": 0.0015404063742607832, + "loss_iou": 0.35546875, + "loss_num": 0.007598876953125, + "loss_xval": 0.75, + "num_input_tokens_seen": 588091468, + "step": 4656 + }, + { + "epoch": 1.194638619893542, + "grad_norm": 28.525163650512695, + "learning_rate": 5e-06, + "loss": 0.7095, + "num_input_tokens_seen": 588217952, + "step": 4657 + }, + { + "epoch": 1.194638619893542, + "loss": 0.6797398328781128, + "loss_ce": 0.002005430171266198, + "loss_iou": 0.318359375, + "loss_num": 0.00787353515625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 588217952, + "step": 4657 + }, + { + "epoch": 1.1948951452574874, + "grad_norm": 37.533973693847656, + "learning_rate": 5e-06, + "loss": 0.8981, + "num_input_tokens_seen": 588344016, + "step": 4658 + }, + { + "epoch": 1.1948951452574874, + "loss": 0.8146689534187317, + "loss_ce": 0.001192423515021801, + "loss_iou": 0.35546875, + "loss_num": 0.020751953125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 588344016, + "step": 4658 + }, + { + "epoch": 1.1951516706214327, + "grad_norm": 43.70058059692383, + "learning_rate": 5e-06, + "loss": 0.9962, + "num_input_tokens_seen": 588470048, + "step": 4659 + }, + { + "epoch": 1.1951516706214327, + "loss": 1.0541913509368896, + "loss_ce": 0.0007244782173074782, + "loss_iou": 0.484375, + "loss_num": 0.0164794921875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 588470048, + "step": 4659 + }, + { + "epoch": 1.1954081959853782, + "grad_norm": 32.383033752441406, + "learning_rate": 5e-06, + "loss": 0.8366, + "num_input_tokens_seen": 588595336, + "step": 4660 + }, + { + "epoch": 1.1954081959853782, + "loss": 0.8854362964630127, + "loss_ce": 0.0011589444475248456, + "loss_iou": 0.419921875, + "loss_num": 0.0089111328125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 588595336, + "step": 4660 + }, + { + "epoch": 1.1956647213493234, + "grad_norm": 34.587921142578125, + "learning_rate": 5e-06, + "loss": 0.792, + "num_input_tokens_seen": 588721424, + "step": 4661 + }, + { + "epoch": 1.1956647213493234, + "loss": 0.7776632905006409, + "loss_ce": 0.003005099017173052, + "loss_iou": 0.365234375, + "loss_num": 0.0089111328125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 588721424, + "step": 4661 + }, + { + "epoch": 1.1959212467132687, + "grad_norm": 37.1828498840332, + "learning_rate": 5e-06, + "loss": 0.8489, + "num_input_tokens_seen": 588848088, + "step": 4662 + }, + { + "epoch": 1.1959212467132687, + "loss": 0.9117839336395264, + "loss_ce": 0.007975384593009949, + "loss_iou": 0.40234375, + "loss_num": 0.019775390625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 588848088, + "step": 4662 + }, + { + "epoch": 1.1961777720772142, + "grad_norm": 61.79922103881836, + "learning_rate": 5e-06, + "loss": 0.8886, + "num_input_tokens_seen": 588973964, + "step": 4663 + }, + { + "epoch": 1.1961777720772142, + "loss": 0.7372597455978394, + "loss_ce": 0.0001992380857700482, + "loss_iou": 0.35546875, + "loss_num": 0.005523681640625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 588973964, + "step": 4663 + }, + { + "epoch": 1.1964342974411595, + "grad_norm": 49.77708435058594, + "learning_rate": 5e-06, + "loss": 0.8532, + "num_input_tokens_seen": 589099520, + "step": 4664 + }, + { + "epoch": 1.1964342974411595, + "loss": 0.726718544960022, + "loss_ce": 0.0013767345808446407, + "loss_iou": 0.337890625, + "loss_num": 0.01019287109375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 589099520, + "step": 4664 + }, + { + "epoch": 1.1966908228051047, + "grad_norm": 80.20882415771484, + "learning_rate": 5e-06, + "loss": 0.8696, + "num_input_tokens_seen": 589225108, + "step": 4665 + }, + { + "epoch": 1.1966908228051047, + "loss": 0.8056936264038086, + "loss_ce": 0.00027368406881578267, + "loss_iou": 0.37890625, + "loss_num": 0.0098876953125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 589225108, + "step": 4665 + }, + { + "epoch": 1.1969473481690502, + "grad_norm": 65.33061981201172, + "learning_rate": 5e-06, + "loss": 0.8635, + "num_input_tokens_seen": 589351400, + "step": 4666 + }, + { + "epoch": 1.1969473481690502, + "loss": 0.8832459449768066, + "loss_ce": 0.0006775836809538305, + "loss_iou": 0.41015625, + "loss_num": 0.0125732421875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 589351400, + "step": 4666 + }, + { + "epoch": 1.1972038735329955, + "grad_norm": 51.592071533203125, + "learning_rate": 5e-06, + "loss": 0.8721, + "num_input_tokens_seen": 589478016, + "step": 4667 + }, + { + "epoch": 1.1972038735329955, + "loss": 1.0552279949188232, + "loss_ce": 0.005423299036920071, + "loss_iou": 0.482421875, + "loss_num": 0.01708984375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 589478016, + "step": 4667 + }, + { + "epoch": 1.197460398896941, + "grad_norm": 38.68691635131836, + "learning_rate": 5e-06, + "loss": 0.7754, + "num_input_tokens_seen": 589602952, + "step": 4668 + }, + { + "epoch": 1.197460398896941, + "loss": 0.8097430467605591, + "loss_ce": 0.0006609877455048263, + "loss_iou": 0.384765625, + "loss_num": 0.008056640625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 589602952, + "step": 4668 + }, + { + "epoch": 1.1977169242608863, + "grad_norm": 40.648616790771484, + "learning_rate": 5e-06, + "loss": 0.8856, + "num_input_tokens_seen": 589729420, + "step": 4669 + }, + { + "epoch": 1.1977169242608863, + "loss": 0.9962791204452515, + "loss_ce": 0.0026267440989613533, + "loss_iou": 0.44921875, + "loss_num": 0.01904296875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 589729420, + "step": 4669 + }, + { + "epoch": 1.1979734496248318, + "grad_norm": 44.97248077392578, + "learning_rate": 5e-06, + "loss": 0.8745, + "num_input_tokens_seen": 589854972, + "step": 4670 + }, + { + "epoch": 1.1979734496248318, + "loss": 0.7775657773017883, + "loss_ce": 0.0002219800662714988, + "loss_iou": 0.36328125, + "loss_num": 0.01043701171875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 589854972, + "step": 4670 + }, + { + "epoch": 1.198229974988777, + "grad_norm": 46.984134674072266, + "learning_rate": 5e-06, + "loss": 0.8511, + "num_input_tokens_seen": 589982252, + "step": 4671 + }, + { + "epoch": 1.198229974988777, + "loss": 0.7204253673553467, + "loss_ce": 0.0011870871530845761, + "loss_iou": 0.337890625, + "loss_num": 0.008544921875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 589982252, + "step": 4671 + }, + { + "epoch": 1.1984865003527223, + "grad_norm": 50.54662322998047, + "learning_rate": 5e-06, + "loss": 0.7082, + "num_input_tokens_seen": 590109300, + "step": 4672 + }, + { + "epoch": 1.1984865003527223, + "loss": 0.7641041278839111, + "loss_ce": 0.0006764218560419977, + "loss_iou": 0.3671875, + "loss_num": 0.005767822265625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 590109300, + "step": 4672 + }, + { + "epoch": 1.1987430257166678, + "grad_norm": 40.06825637817383, + "learning_rate": 5e-06, + "loss": 0.9101, + "num_input_tokens_seen": 590235124, + "step": 4673 + }, + { + "epoch": 1.1987430257166678, + "loss": 0.8182198405265808, + "loss_ce": 0.0015694622416049242, + "loss_iou": 0.380859375, + "loss_num": 0.0111083984375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 590235124, + "step": 4673 + }, + { + "epoch": 1.198999551080613, + "grad_norm": 26.824068069458008, + "learning_rate": 5e-06, + "loss": 0.8488, + "num_input_tokens_seen": 590360964, + "step": 4674 + }, + { + "epoch": 1.198999551080613, + "loss": 0.7574440836906433, + "loss_ce": 0.000608118949458003, + "loss_iou": 0.35546875, + "loss_num": 0.00927734375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 590360964, + "step": 4674 + }, + { + "epoch": 1.1992560764445583, + "grad_norm": 25.719064712524414, + "learning_rate": 5e-06, + "loss": 0.8165, + "num_input_tokens_seen": 590485804, + "step": 4675 + }, + { + "epoch": 1.1992560764445583, + "loss": 0.6556724309921265, + "loss_ce": 0.0001547814317746088, + "loss_iou": 0.31640625, + "loss_num": 0.004425048828125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 590485804, + "step": 4675 + }, + { + "epoch": 1.1995126018085038, + "grad_norm": 29.55878257751465, + "learning_rate": 5e-06, + "loss": 0.8612, + "num_input_tokens_seen": 590611888, + "step": 4676 + }, + { + "epoch": 1.1995126018085038, + "loss": 0.7521672248840332, + "loss_ce": 0.00021405494771897793, + "loss_iou": 0.359375, + "loss_num": 0.00665283203125, + "loss_xval": 0.75, + "num_input_tokens_seen": 590611888, + "step": 4676 + }, + { + "epoch": 1.199769127172449, + "grad_norm": 32.51898193359375, + "learning_rate": 5e-06, + "loss": 0.8105, + "num_input_tokens_seen": 590739996, + "step": 4677 + }, + { + "epoch": 1.199769127172449, + "loss": 0.7716602087020874, + "loss_ce": 0.0006641586660407484, + "loss_iou": 0.349609375, + "loss_num": 0.0142822265625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 590739996, + "step": 4677 + }, + { + "epoch": 1.2000256525363946, + "grad_norm": 40.99172592163086, + "learning_rate": 5e-06, + "loss": 0.8235, + "num_input_tokens_seen": 590866092, + "step": 4678 + }, + { + "epoch": 1.2000256525363946, + "loss": 0.8204472064971924, + "loss_ce": 0.001599551527760923, + "loss_iou": 0.38671875, + "loss_num": 0.00885009765625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 590866092, + "step": 4678 + }, + { + "epoch": 1.2002821779003399, + "grad_norm": 53.06911087036133, + "learning_rate": 5e-06, + "loss": 0.8242, + "num_input_tokens_seen": 590992320, + "step": 4679 + }, + { + "epoch": 1.2002821779003399, + "loss": 0.9096395969390869, + "loss_ce": 0.0014364407397806644, + "loss_iou": 0.41796875, + "loss_num": 0.0147705078125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 590992320, + "step": 4679 + }, + { + "epoch": 1.2005387032642854, + "grad_norm": 58.42938995361328, + "learning_rate": 5e-06, + "loss": 0.8196, + "num_input_tokens_seen": 591118196, + "step": 4680 + }, + { + "epoch": 1.2005387032642854, + "loss": 0.727676510810852, + "loss_ce": 0.00013741104339715093, + "loss_iou": 0.34375, + "loss_num": 0.007659912109375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 591118196, + "step": 4680 + }, + { + "epoch": 1.2007952286282306, + "grad_norm": 43.89720153808594, + "learning_rate": 5e-06, + "loss": 0.9585, + "num_input_tokens_seen": 591243120, + "step": 4681 + }, + { + "epoch": 1.2007952286282306, + "loss": 1.17656672000885, + "loss_ce": 0.006156535353511572, + "loss_iou": 0.51953125, + "loss_num": 0.0257568359375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 591243120, + "step": 4681 + }, + { + "epoch": 1.201051753992176, + "grad_norm": 33.516448974609375, + "learning_rate": 5e-06, + "loss": 0.9157, + "num_input_tokens_seen": 591369620, + "step": 4682 + }, + { + "epoch": 1.201051753992176, + "loss": 0.7308672666549683, + "loss_ce": 0.003328250488266349, + "loss_iou": 0.3359375, + "loss_num": 0.01080322265625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 591369620, + "step": 4682 + }, + { + "epoch": 1.2013082793561214, + "grad_norm": 27.665977478027344, + "learning_rate": 5e-06, + "loss": 0.8149, + "num_input_tokens_seen": 591494692, + "step": 4683 + }, + { + "epoch": 1.2013082793561214, + "loss": 0.7366397380828857, + "loss_ce": 0.0005556833930313587, + "loss_iou": 0.349609375, + "loss_num": 0.00750732421875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 591494692, + "step": 4683 + }, + { + "epoch": 1.2015648047200667, + "grad_norm": 36.0884895324707, + "learning_rate": 5e-06, + "loss": 0.7865, + "num_input_tokens_seen": 591620884, + "step": 4684 + }, + { + "epoch": 1.2015648047200667, + "loss": 0.8894338607788086, + "loss_ce": 0.0034474984277039766, + "loss_iou": 0.404296875, + "loss_num": 0.01513671875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 591620884, + "step": 4684 + }, + { + "epoch": 1.2018213300840122, + "grad_norm": 42.307884216308594, + "learning_rate": 5e-06, + "loss": 0.9956, + "num_input_tokens_seen": 591747560, + "step": 4685 + }, + { + "epoch": 1.2018213300840122, + "loss": 0.7300618886947632, + "loss_ce": 0.0010579730151221156, + "loss_iou": 0.3515625, + "loss_num": 0.005340576171875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 591747560, + "step": 4685 + }, + { + "epoch": 1.2020778554479574, + "grad_norm": 35.45066833496094, + "learning_rate": 5e-06, + "loss": 0.8416, + "num_input_tokens_seen": 591873828, + "step": 4686 + }, + { + "epoch": 1.2020778554479574, + "loss": 0.8106290102005005, + "loss_ce": 8.211834210669622e-05, + "loss_iou": 0.3671875, + "loss_num": 0.01513671875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 591873828, + "step": 4686 + }, + { + "epoch": 1.2023343808119027, + "grad_norm": 44.37169647216797, + "learning_rate": 5e-06, + "loss": 0.8405, + "num_input_tokens_seen": 592001320, + "step": 4687 + }, + { + "epoch": 1.2023343808119027, + "loss": 0.7759294509887695, + "loss_ce": 0.0010270995553582907, + "loss_iou": 0.3671875, + "loss_num": 0.008544921875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 592001320, + "step": 4687 + }, + { + "epoch": 1.2025909061758482, + "grad_norm": 50.855751037597656, + "learning_rate": 5e-06, + "loss": 0.9399, + "num_input_tokens_seen": 592127072, + "step": 4688 + }, + { + "epoch": 1.2025909061758482, + "loss": 0.8274017572402954, + "loss_ce": 0.0007415386498905718, + "loss_iou": 0.380859375, + "loss_num": 0.01275634765625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 592127072, + "step": 4688 + }, + { + "epoch": 1.2028474315397935, + "grad_norm": 46.25652313232422, + "learning_rate": 5e-06, + "loss": 0.86, + "num_input_tokens_seen": 592253156, + "step": 4689 + }, + { + "epoch": 1.2028474315397935, + "loss": 0.6704822778701782, + "loss_ce": 0.0003162251668982208, + "loss_iou": 0.32421875, + "loss_num": 0.00482177734375, + "loss_xval": 0.671875, + "num_input_tokens_seen": 592253156, + "step": 4689 + }, + { + "epoch": 1.203103956903739, + "grad_norm": 57.259883880615234, + "learning_rate": 5e-06, + "loss": 0.812, + "num_input_tokens_seen": 592379368, + "step": 4690 + }, + { + "epoch": 1.203103956903739, + "loss": 0.7929420471191406, + "loss_ce": 0.00046154114534147084, + "loss_iou": 0.375, + "loss_num": 0.0084228515625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 592379368, + "step": 4690 + }, + { + "epoch": 1.2033604822676842, + "grad_norm": 54.14265823364258, + "learning_rate": 5e-06, + "loss": 0.9507, + "num_input_tokens_seen": 592506932, + "step": 4691 + }, + { + "epoch": 1.2033604822676842, + "loss": 0.8070868253707886, + "loss_ce": 0.000690333778038621, + "loss_iou": 0.3828125, + "loss_num": 0.0079345703125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 592506932, + "step": 4691 + }, + { + "epoch": 1.2036170076316295, + "grad_norm": 43.23212432861328, + "learning_rate": 5e-06, + "loss": 0.7854, + "num_input_tokens_seen": 592632196, + "step": 4692 + }, + { + "epoch": 1.2036170076316295, + "loss": 0.7121663093566895, + "loss_ce": 0.004646782297641039, + "loss_iou": 0.3359375, + "loss_num": 0.007659912109375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 592632196, + "step": 4692 + }, + { + "epoch": 1.203873532995575, + "grad_norm": 44.388511657714844, + "learning_rate": 5e-06, + "loss": 0.8255, + "num_input_tokens_seen": 592758824, + "step": 4693 + }, + { + "epoch": 1.203873532995575, + "loss": 0.7963682413101196, + "loss_ce": 0.00095805135788396, + "loss_iou": 0.373046875, + "loss_num": 0.0098876953125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 592758824, + "step": 4693 + }, + { + "epoch": 1.2041300583595203, + "grad_norm": 50.6263542175293, + "learning_rate": 5e-06, + "loss": 0.7885, + "num_input_tokens_seen": 592886724, + "step": 4694 + }, + { + "epoch": 1.2041300583595203, + "loss": 0.8460016250610352, + "loss_ce": 0.0002984994789585471, + "loss_iou": 0.3828125, + "loss_num": 0.0159912109375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 592886724, + "step": 4694 + }, + { + "epoch": 1.2043865837234657, + "grad_norm": 51.91606903076172, + "learning_rate": 5e-06, + "loss": 0.8392, + "num_input_tokens_seen": 593014008, + "step": 4695 + }, + { + "epoch": 1.2043865837234657, + "loss": 0.7696671485900879, + "loss_ce": 0.004042173735797405, + "loss_iou": 0.36328125, + "loss_num": 0.00799560546875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 593014008, + "step": 4695 + }, + { + "epoch": 1.204643109087411, + "grad_norm": 34.18097686767578, + "learning_rate": 5e-06, + "loss": 0.809, + "num_input_tokens_seen": 593140656, + "step": 4696 + }, + { + "epoch": 1.204643109087411, + "loss": 1.1126710176467896, + "loss_ce": 0.003051897045224905, + "loss_iou": 0.5, + "loss_num": 0.021484375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 593140656, + "step": 4696 + }, + { + "epoch": 1.2048996344513563, + "grad_norm": 41.32080841064453, + "learning_rate": 5e-06, + "loss": 0.8997, + "num_input_tokens_seen": 593266568, + "step": 4697 + }, + { + "epoch": 1.2048996344513563, + "loss": 0.9508911371231079, + "loss_ce": 0.0002075146185234189, + "loss_iou": 0.447265625, + "loss_num": 0.011474609375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 593266568, + "step": 4697 + }, + { + "epoch": 1.2051561598153018, + "grad_norm": 58.58139419555664, + "learning_rate": 5e-06, + "loss": 1.037, + "num_input_tokens_seen": 593392528, + "step": 4698 + }, + { + "epoch": 1.2051561598153018, + "loss": 1.200282096862793, + "loss_ce": 8.675708522787318e-05, + "loss_iou": 0.546875, + "loss_num": 0.021240234375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 593392528, + "step": 4698 + }, + { + "epoch": 1.205412685179247, + "grad_norm": 48.242225646972656, + "learning_rate": 5e-06, + "loss": 0.9947, + "num_input_tokens_seen": 593518856, + "step": 4699 + }, + { + "epoch": 1.205412685179247, + "loss": 0.9958717226982117, + "loss_ce": 0.0012427996844053268, + "loss_iou": 0.447265625, + "loss_num": 0.019775390625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 593518856, + "step": 4699 + }, + { + "epoch": 1.2056692105431925, + "grad_norm": 43.049560546875, + "learning_rate": 5e-06, + "loss": 0.8336, + "num_input_tokens_seen": 593647216, + "step": 4700 + }, + { + "epoch": 1.2056692105431925, + "loss": 0.9743956923484802, + "loss_ce": 0.003204255597665906, + "loss_iou": 0.44921875, + "loss_num": 0.01458740234375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 593647216, + "step": 4700 + }, + { + "epoch": 1.2059257359071378, + "grad_norm": 41.529476165771484, + "learning_rate": 5e-06, + "loss": 0.9216, + "num_input_tokens_seen": 593772556, + "step": 4701 + }, + { + "epoch": 1.2059257359071378, + "loss": 1.0307214260101318, + "loss_ce": 0.00044800550676882267, + "loss_iou": 0.46484375, + "loss_num": 0.020263671875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 593772556, + "step": 4701 + }, + { + "epoch": 1.206182261271083, + "grad_norm": 25.759618759155273, + "learning_rate": 5e-06, + "loss": 0.8499, + "num_input_tokens_seen": 593899296, + "step": 4702 + }, + { + "epoch": 1.206182261271083, + "loss": 0.6942624449729919, + "loss_ce": 0.0009030892979353666, + "loss_iou": 0.328125, + "loss_num": 0.00775146484375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 593899296, + "step": 4702 + }, + { + "epoch": 1.2064387866350286, + "grad_norm": 37.496524810791016, + "learning_rate": 5e-06, + "loss": 0.7809, + "num_input_tokens_seen": 594026272, + "step": 4703 + }, + { + "epoch": 1.2064387866350286, + "loss": 0.5275313854217529, + "loss_ce": 0.00018757552606984973, + "loss_iou": 0.2451171875, + "loss_num": 0.007415771484375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 594026272, + "step": 4703 + }, + { + "epoch": 1.2066953119989738, + "grad_norm": 51.5787467956543, + "learning_rate": 5e-06, + "loss": 0.9175, + "num_input_tokens_seen": 594152548, + "step": 4704 + }, + { + "epoch": 1.2066953119989738, + "loss": 0.8986421823501587, + "loss_ce": 0.002401982666924596, + "loss_iou": 0.412109375, + "loss_num": 0.014892578125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 594152548, + "step": 4704 + }, + { + "epoch": 1.2069518373629193, + "grad_norm": 39.744468688964844, + "learning_rate": 5e-06, + "loss": 0.8277, + "num_input_tokens_seen": 594279676, + "step": 4705 + }, + { + "epoch": 1.2069518373629193, + "loss": 0.7738808393478394, + "loss_ce": 0.0004432910354807973, + "loss_iou": 0.369140625, + "loss_num": 0.0069580078125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 594279676, + "step": 4705 + }, + { + "epoch": 1.2072083627268646, + "grad_norm": 48.41879653930664, + "learning_rate": 5e-06, + "loss": 0.6979, + "num_input_tokens_seen": 594405168, + "step": 4706 + }, + { + "epoch": 1.2072083627268646, + "loss": 0.729809582233429, + "loss_ce": 0.0003174064331687987, + "loss_iou": 0.3359375, + "loss_num": 0.0113525390625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 594405168, + "step": 4706 + }, + { + "epoch": 1.20746488809081, + "grad_norm": 44.24345397949219, + "learning_rate": 5e-06, + "loss": 0.8684, + "num_input_tokens_seen": 594531668, + "step": 4707 + }, + { + "epoch": 1.20746488809081, + "loss": 0.9710237979888916, + "loss_ce": 0.002273819874972105, + "loss_iou": 0.431640625, + "loss_num": 0.021240234375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 594531668, + "step": 4707 + }, + { + "epoch": 1.2077214134547554, + "grad_norm": 55.02375030517578, + "learning_rate": 5e-06, + "loss": 0.9193, + "num_input_tokens_seen": 594657896, + "step": 4708 + }, + { + "epoch": 1.2077214134547554, + "loss": 0.8670024871826172, + "loss_ce": 5.9162463003303856e-05, + "loss_iou": 0.400390625, + "loss_num": 0.01300048828125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 594657896, + "step": 4708 + }, + { + "epoch": 1.2079779388187006, + "grad_norm": 49.19040298461914, + "learning_rate": 5e-06, + "loss": 0.8989, + "num_input_tokens_seen": 594784424, + "step": 4709 + }, + { + "epoch": 1.2079779388187006, + "loss": 0.9079356789588928, + "loss_ce": 0.00022084417287260294, + "loss_iou": 0.42578125, + "loss_num": 0.011474609375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 594784424, + "step": 4709 + }, + { + "epoch": 1.2082344641826461, + "grad_norm": 58.43973159790039, + "learning_rate": 5e-06, + "loss": 0.8249, + "num_input_tokens_seen": 594910360, + "step": 4710 + }, + { + "epoch": 1.2082344641826461, + "loss": 0.9275294542312622, + "loss_ce": 0.0007716322434134781, + "loss_iou": 0.41015625, + "loss_num": 0.02099609375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 594910360, + "step": 4710 + }, + { + "epoch": 1.2084909895465914, + "grad_norm": 42.231502532958984, + "learning_rate": 5e-06, + "loss": 0.8436, + "num_input_tokens_seen": 595036116, + "step": 4711 + }, + { + "epoch": 1.2084909895465914, + "loss": 0.7870713472366333, + "loss_ce": 0.0004502838710322976, + "loss_iou": 0.37109375, + "loss_num": 0.00927734375, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 595036116, + "step": 4711 + }, + { + "epoch": 1.2087475149105367, + "grad_norm": 55.52021026611328, + "learning_rate": 5e-06, + "loss": 0.8415, + "num_input_tokens_seen": 595162776, + "step": 4712 + }, + { + "epoch": 1.2087475149105367, + "loss": 0.8281638622283936, + "loss_ce": 0.00028300462872721255, + "loss_iou": 0.380859375, + "loss_num": 0.0133056640625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 595162776, + "step": 4712 + }, + { + "epoch": 1.2090040402744822, + "grad_norm": 50.521060943603516, + "learning_rate": 5e-06, + "loss": 0.8375, + "num_input_tokens_seen": 595289248, + "step": 4713 + }, + { + "epoch": 1.2090040402744822, + "loss": 0.8469698429107666, + "loss_ce": 0.00029014816391281784, + "loss_iou": 0.396484375, + "loss_num": 0.01055908203125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 595289248, + "step": 4713 + }, + { + "epoch": 1.2092605656384274, + "grad_norm": 47.39387512207031, + "learning_rate": 5e-06, + "loss": 0.8124, + "num_input_tokens_seen": 595415540, + "step": 4714 + }, + { + "epoch": 1.2092605656384274, + "loss": 0.962232768535614, + "loss_ce": 0.0008070093463174999, + "loss_iou": 0.43359375, + "loss_num": 0.01904296875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 595415540, + "step": 4714 + }, + { + "epoch": 1.209517091002373, + "grad_norm": 46.55170822143555, + "learning_rate": 5e-06, + "loss": 0.9283, + "num_input_tokens_seen": 595540568, + "step": 4715 + }, + { + "epoch": 1.209517091002373, + "loss": 1.0562406778335571, + "loss_ce": 0.0030179526656866074, + "loss_iou": 0.48046875, + "loss_num": 0.0189208984375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 595540568, + "step": 4715 + }, + { + "epoch": 1.2097736163663182, + "grad_norm": 37.793880462646484, + "learning_rate": 5e-06, + "loss": 0.8776, + "num_input_tokens_seen": 595668460, + "step": 4716 + }, + { + "epoch": 1.2097736163663182, + "loss": 0.9116119146347046, + "loss_ce": 0.002920536557212472, + "loss_iou": 0.419921875, + "loss_num": 0.01385498046875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 595668460, + "step": 4716 + }, + { + "epoch": 1.2100301417302637, + "grad_norm": 41.18879699707031, + "learning_rate": 5e-06, + "loss": 0.871, + "num_input_tokens_seen": 595795168, + "step": 4717 + }, + { + "epoch": 1.2100301417302637, + "loss": 0.8438723683357239, + "loss_ce": 0.00012240752403158695, + "loss_iou": 0.384765625, + "loss_num": 0.01458740234375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 595795168, + "step": 4717 + }, + { + "epoch": 1.210286667094209, + "grad_norm": 52.59662628173828, + "learning_rate": 5e-06, + "loss": 0.8327, + "num_input_tokens_seen": 595922504, + "step": 4718 + }, + { + "epoch": 1.210286667094209, + "loss": 0.8634018301963806, + "loss_ce": 0.0006088363588787615, + "loss_iou": 0.392578125, + "loss_num": 0.0157470703125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 595922504, + "step": 4718 + }, + { + "epoch": 1.2105431924581542, + "grad_norm": 45.294189453125, + "learning_rate": 5e-06, + "loss": 0.8803, + "num_input_tokens_seen": 596048832, + "step": 4719 + }, + { + "epoch": 1.2105431924581542, + "loss": 0.831594705581665, + "loss_ce": 0.0010282954899594188, + "loss_iou": 0.375, + "loss_num": 0.0157470703125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 596048832, + "step": 4719 + }, + { + "epoch": 1.2107997178220997, + "grad_norm": 34.26205062866211, + "learning_rate": 5e-06, + "loss": 0.867, + "num_input_tokens_seen": 596174688, + "step": 4720 + }, + { + "epoch": 1.2107997178220997, + "loss": 0.8505825400352478, + "loss_ce": 0.00048486419836990535, + "loss_iou": 0.3828125, + "loss_num": 0.0166015625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 596174688, + "step": 4720 + }, + { + "epoch": 1.211056243186045, + "grad_norm": 38.8514518737793, + "learning_rate": 5e-06, + "loss": 0.7907, + "num_input_tokens_seen": 596301536, + "step": 4721 + }, + { + "epoch": 1.211056243186045, + "loss": 0.7327573299407959, + "loss_ce": 0.0015561676118522882, + "loss_iou": 0.33984375, + "loss_num": 0.010009765625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 596301536, + "step": 4721 + }, + { + "epoch": 1.2113127685499903, + "grad_norm": 36.626094818115234, + "learning_rate": 5e-06, + "loss": 0.8735, + "num_input_tokens_seen": 596428200, + "step": 4722 + }, + { + "epoch": 1.2113127685499903, + "loss": 0.8627752065658569, + "loss_ce": 0.0004705711326096207, + "loss_iou": 0.404296875, + "loss_num": 0.01092529296875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 596428200, + "step": 4722 + }, + { + "epoch": 1.2115692939139358, + "grad_norm": 28.392742156982422, + "learning_rate": 5e-06, + "loss": 0.8799, + "num_input_tokens_seen": 596554592, + "step": 4723 + }, + { + "epoch": 1.2115692939139358, + "loss": 0.9282349348068237, + "loss_ce": 0.001233050599694252, + "loss_iou": 0.423828125, + "loss_num": 0.0162353515625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 596554592, + "step": 4723 + }, + { + "epoch": 1.211825819277881, + "grad_norm": 32.649383544921875, + "learning_rate": 5e-06, + "loss": 0.7871, + "num_input_tokens_seen": 596679992, + "step": 4724 + }, + { + "epoch": 1.211825819277881, + "loss": 0.8892539739608765, + "loss_ce": 0.0015586887020617723, + "loss_iou": 0.404296875, + "loss_num": 0.0159912109375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 596679992, + "step": 4724 + }, + { + "epoch": 1.2120823446418265, + "grad_norm": 37.918731689453125, + "learning_rate": 5e-06, + "loss": 0.9147, + "num_input_tokens_seen": 596806712, + "step": 4725 + }, + { + "epoch": 1.2120823446418265, + "loss": 0.8720120191574097, + "loss_ce": 0.0009182070498354733, + "loss_iou": 0.388671875, + "loss_num": 0.0186767578125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 596806712, + "step": 4725 + }, + { + "epoch": 1.2123388700057718, + "grad_norm": 51.11736297607422, + "learning_rate": 5e-06, + "loss": 0.8821, + "num_input_tokens_seen": 596933472, + "step": 4726 + }, + { + "epoch": 1.2123388700057718, + "loss": 1.1661514043807983, + "loss_ce": 0.0006240421207621694, + "loss_iou": 0.51171875, + "loss_num": 0.028076171875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 596933472, + "step": 4726 + }, + { + "epoch": 1.2125953953697173, + "grad_norm": 38.51396179199219, + "learning_rate": 5e-06, + "loss": 0.9462, + "num_input_tokens_seen": 597059868, + "step": 4727 + }, + { + "epoch": 1.2125953953697173, + "loss": 0.938277542591095, + "loss_ce": 0.00028925033984705806, + "loss_iou": 0.435546875, + "loss_num": 0.01348876953125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 597059868, + "step": 4727 + }, + { + "epoch": 1.2128519207336625, + "grad_norm": 41.25739288330078, + "learning_rate": 5e-06, + "loss": 0.7516, + "num_input_tokens_seen": 597187112, + "step": 4728 + }, + { + "epoch": 1.2128519207336625, + "loss": 0.8274070024490356, + "loss_ce": 0.00025855566491372883, + "loss_iou": 0.38671875, + "loss_num": 0.01104736328125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 597187112, + "step": 4728 + }, + { + "epoch": 1.2131084460976078, + "grad_norm": 42.66353988647461, + "learning_rate": 5e-06, + "loss": 0.8497, + "num_input_tokens_seen": 597313128, + "step": 4729 + }, + { + "epoch": 1.2131084460976078, + "loss": 0.8456262946128845, + "loss_ce": 0.00016732528456486762, + "loss_iou": 0.400390625, + "loss_num": 0.00909423828125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 597313128, + "step": 4729 + }, + { + "epoch": 1.2133649714615533, + "grad_norm": 36.667144775390625, + "learning_rate": 5e-06, + "loss": 0.8145, + "num_input_tokens_seen": 597439488, + "step": 4730 + }, + { + "epoch": 1.2133649714615533, + "loss": 0.7247690558433533, + "loss_ce": 0.00015968694060575217, + "loss_iou": 0.34375, + "loss_num": 0.007415771484375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 597439488, + "step": 4730 + }, + { + "epoch": 1.2136214968254986, + "grad_norm": 45.589229583740234, + "learning_rate": 5e-06, + "loss": 0.8933, + "num_input_tokens_seen": 597565624, + "step": 4731 + }, + { + "epoch": 1.2136214968254986, + "loss": 0.9644152522087097, + "loss_ce": 5.979950219625607e-05, + "loss_iou": 0.4375, + "loss_num": 0.017822265625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 597565624, + "step": 4731 + }, + { + "epoch": 1.213878022189444, + "grad_norm": 48.893035888671875, + "learning_rate": 5e-06, + "loss": 0.8049, + "num_input_tokens_seen": 597691996, + "step": 4732 + }, + { + "epoch": 1.213878022189444, + "loss": 0.8864907026290894, + "loss_ce": 0.00026023300597444177, + "loss_iou": 0.419921875, + "loss_num": 0.00885009765625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 597691996, + "step": 4732 + }, + { + "epoch": 1.2141345475533893, + "grad_norm": 176.50762939453125, + "learning_rate": 5e-06, + "loss": 0.9639, + "num_input_tokens_seen": 597818200, + "step": 4733 + }, + { + "epoch": 1.2141345475533893, + "loss": 0.7966897487640381, + "loss_ce": 0.0003029863000847399, + "loss_iou": 0.37890625, + "loss_num": 0.00738525390625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 597818200, + "step": 4733 + }, + { + "epoch": 1.2143910729173346, + "grad_norm": 49.131900787353516, + "learning_rate": 5e-06, + "loss": 0.8888, + "num_input_tokens_seen": 597943708, + "step": 4734 + }, + { + "epoch": 1.2143910729173346, + "loss": 0.978903591632843, + "loss_ce": 0.0028293898794800043, + "loss_iou": 0.44921875, + "loss_num": 0.01556396484375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 597943708, + "step": 4734 + }, + { + "epoch": 1.21464759828128, + "grad_norm": 36.95499801635742, + "learning_rate": 5e-06, + "loss": 0.8273, + "num_input_tokens_seen": 598069568, + "step": 4735 + }, + { + "epoch": 1.21464759828128, + "loss": 0.8185850381851196, + "loss_ce": 0.0012022381415590644, + "loss_iou": 0.392578125, + "loss_num": 0.00665283203125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 598069568, + "step": 4735 + }, + { + "epoch": 1.2149041236452254, + "grad_norm": 57.81605529785156, + "learning_rate": 5e-06, + "loss": 0.6808, + "num_input_tokens_seen": 598196980, + "step": 4736 + }, + { + "epoch": 1.2149041236452254, + "loss": 0.5676996111869812, + "loss_ce": 7.267329783644527e-05, + "loss_iou": 0.2734375, + "loss_num": 0.004241943359375, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 598196980, + "step": 4736 + }, + { + "epoch": 1.2151606490091709, + "grad_norm": 50.31293869018555, + "learning_rate": 5e-06, + "loss": 0.8144, + "num_input_tokens_seen": 598321728, + "step": 4737 + }, + { + "epoch": 1.2151606490091709, + "loss": 0.8509231805801392, + "loss_ce": 0.003755206009373069, + "loss_iou": 0.384765625, + "loss_num": 0.01544189453125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 598321728, + "step": 4737 + }, + { + "epoch": 1.2154171743731161, + "grad_norm": 23.222808837890625, + "learning_rate": 5e-06, + "loss": 0.6401, + "num_input_tokens_seen": 598448204, + "step": 4738 + }, + { + "epoch": 1.2154171743731161, + "loss": 0.6502104997634888, + "loss_ce": 0.0005522611318156123, + "loss_iou": 0.3125, + "loss_num": 0.00482177734375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 598448204, + "step": 4738 + }, + { + "epoch": 1.2156736997370614, + "grad_norm": 41.279415130615234, + "learning_rate": 5e-06, + "loss": 0.7521, + "num_input_tokens_seen": 598574212, + "step": 4739 + }, + { + "epoch": 1.2156736997370614, + "loss": 0.8236714601516724, + "loss_ce": 0.0004292973899282515, + "loss_iou": 0.37890625, + "loss_num": 0.01318359375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 598574212, + "step": 4739 + }, + { + "epoch": 1.215930225101007, + "grad_norm": 55.85179901123047, + "learning_rate": 5e-06, + "loss": 0.782, + "num_input_tokens_seen": 598701844, + "step": 4740 + }, + { + "epoch": 1.215930225101007, + "loss": 0.7880877256393433, + "loss_ce": 0.00024598141317255795, + "loss_iou": 0.365234375, + "loss_num": 0.01171875, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 598701844, + "step": 4740 + }, + { + "epoch": 1.2161867504649522, + "grad_norm": 43.085426330566406, + "learning_rate": 5e-06, + "loss": 0.7564, + "num_input_tokens_seen": 598829208, + "step": 4741 + }, + { + "epoch": 1.2161867504649522, + "loss": 0.8001439571380615, + "loss_ce": 0.0005834367475472391, + "loss_iou": 0.369140625, + "loss_num": 0.01171875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 598829208, + "step": 4741 + }, + { + "epoch": 1.2164432758288977, + "grad_norm": 41.24732208251953, + "learning_rate": 5e-06, + "loss": 0.7991, + "num_input_tokens_seen": 598956344, + "step": 4742 + }, + { + "epoch": 1.2164432758288977, + "loss": 0.6961536407470703, + "loss_ce": 0.00035286351339891553, + "loss_iou": 0.330078125, + "loss_num": 0.007232666015625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 598956344, + "step": 4742 + }, + { + "epoch": 1.216699801192843, + "grad_norm": 46.96685791015625, + "learning_rate": 5e-06, + "loss": 0.8431, + "num_input_tokens_seen": 599082512, + "step": 4743 + }, + { + "epoch": 1.216699801192843, + "loss": 0.7952233552932739, + "loss_ce": 0.0003014913818333298, + "loss_iou": 0.373046875, + "loss_num": 0.010009765625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 599082512, + "step": 4743 + }, + { + "epoch": 1.2169563265567884, + "grad_norm": 45.461204528808594, + "learning_rate": 5e-06, + "loss": 0.7601, + "num_input_tokens_seen": 599208056, + "step": 4744 + }, + { + "epoch": 1.2169563265567884, + "loss": 0.6853104829788208, + "loss_ce": 0.0004960055812261999, + "loss_iou": 0.3125, + "loss_num": 0.0120849609375, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 599208056, + "step": 4744 + }, + { + "epoch": 1.2172128519207337, + "grad_norm": 43.953514099121094, + "learning_rate": 5e-06, + "loss": 0.8238, + "num_input_tokens_seen": 599334128, + "step": 4745 + }, + { + "epoch": 1.2172128519207337, + "loss": 0.9511550068855286, + "loss_ce": 0.003156960243359208, + "loss_iou": 0.421875, + "loss_num": 0.0205078125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 599334128, + "step": 4745 + }, + { + "epoch": 1.217469377284679, + "grad_norm": 42.079185485839844, + "learning_rate": 5e-06, + "loss": 0.8139, + "num_input_tokens_seen": 599458564, + "step": 4746 + }, + { + "epoch": 1.217469377284679, + "loss": 0.8978749513626099, + "loss_ce": 0.0013904988300055265, + "loss_iou": 0.3984375, + "loss_num": 0.0198974609375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 599458564, + "step": 4746 + }, + { + "epoch": 1.2177259026486245, + "grad_norm": 54.93452835083008, + "learning_rate": 5e-06, + "loss": 0.938, + "num_input_tokens_seen": 599585072, + "step": 4747 + }, + { + "epoch": 1.2177259026486245, + "loss": 0.9298986196517944, + "loss_ce": 0.0016759387217462063, + "loss_iou": 0.4296875, + "loss_num": 0.01409912109375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 599585072, + "step": 4747 + }, + { + "epoch": 1.2179824280125697, + "grad_norm": 75.22891998291016, + "learning_rate": 5e-06, + "loss": 0.8622, + "num_input_tokens_seen": 599711832, + "step": 4748 + }, + { + "epoch": 1.2179824280125697, + "loss": 0.9479256868362427, + "loss_ce": 0.0028573786839842796, + "loss_iou": 0.419921875, + "loss_num": 0.0213623046875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 599711832, + "step": 4748 + }, + { + "epoch": 1.218238953376515, + "grad_norm": 50.59974670410156, + "learning_rate": 5e-06, + "loss": 0.7991, + "num_input_tokens_seen": 599838056, + "step": 4749 + }, + { + "epoch": 1.218238953376515, + "loss": 0.7107808589935303, + "loss_ce": 0.003017193404957652, + "loss_iou": 0.33984375, + "loss_num": 0.00555419921875, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 599838056, + "step": 4749 + }, + { + "epoch": 1.2184954787404605, + "grad_norm": 39.92596435546875, + "learning_rate": 5e-06, + "loss": 0.8353, + "num_input_tokens_seen": 599964680, + "step": 4750 + }, + { + "epoch": 1.2184954787404605, + "eval_icons_CIoU": 0.32718271017074585, + "eval_icons_GIoU": 0.2887353301048279, + "eval_icons_IoU": 0.49273164570331573, + "eval_icons_MAE_all": 0.021599503234028816, + "eval_icons_MAE_h": 0.03046303056180477, + "eval_icons_MAE_w": 0.04236280918121338, + "eval_icons_MAE_x_boxes": 0.04186774976551533, + "eval_icons_MAE_y_boxes": 0.028956228867173195, + "eval_icons_NUM_probability": 0.9998744428157806, + "eval_icons_inside_bbox": 0.7239583432674408, + "eval_icons_loss": 1.52036714553833, + "eval_icons_loss_ce": 4.73630952910753e-05, + "eval_icons_loss_iou": 0.6951904296875, + "eval_icons_loss_num": 0.02354717254638672, + "eval_icons_loss_xval": 1.50927734375, + "eval_icons_runtime": 57.6167, + "eval_icons_samples_per_second": 0.868, + "eval_icons_steps_per_second": 0.035, + "num_input_tokens_seen": 599964680, + "step": 4750 + }, + { + "epoch": 1.2184954787404605, + "eval_screenspot_CIoU": 0.12035692979892094, + "eval_screenspot_GIoU": 0.10336928938825925, + "eval_screenspot_IoU": 0.2940397957960765, + "eval_screenspot_MAE_all": 0.07984606424967448, + "eval_screenspot_MAE_h": 0.07090425118803978, + "eval_screenspot_MAE_w": 0.12556160738070807, + "eval_screenspot_MAE_x_boxes": 0.1191558043162028, + "eval_screenspot_MAE_y_boxes": 0.05418539543946584, + "eval_screenspot_NUM_probability": 0.9999515811602274, + "eval_screenspot_inside_bbox": 0.6195833285649618, + "eval_screenspot_loss": 2.236574649810791, + "eval_screenspot_loss_ce": 0.006025688101847966, + "eval_screenspot_loss_iou": 0.9197591145833334, + "eval_screenspot_loss_num": 0.0849151611328125, + "eval_screenspot_loss_xval": 2.2652994791666665, + "eval_screenspot_runtime": 93.5297, + "eval_screenspot_samples_per_second": 0.952, + "eval_screenspot_steps_per_second": 0.032, + "num_input_tokens_seen": 599964680, + "step": 4750 + }, + { + "epoch": 1.2184954787404605, + "loss": 2.2678704261779785, + "loss_ce": 0.003222143277525902, + "loss_iou": 0.9375, + "loss_num": 0.0771484375, + "loss_xval": 2.265625, + "num_input_tokens_seen": 599964680, + "step": 4750 + }, + { + "epoch": 1.2187520041044058, + "grad_norm": 19.07582664489746, + "learning_rate": 5e-06, + "loss": 0.8808, + "num_input_tokens_seen": 600090504, + "step": 4751 + }, + { + "epoch": 1.2187520041044058, + "loss": 0.948464035987854, + "loss_ce": 0.00510466517880559, + "loss_iou": 0.443359375, + "loss_num": 0.0108642578125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 600090504, + "step": 4751 + }, + { + "epoch": 1.2190085294683513, + "grad_norm": 50.30540466308594, + "learning_rate": 5e-06, + "loss": 0.8978, + "num_input_tokens_seen": 600216836, + "step": 4752 + }, + { + "epoch": 1.2190085294683513, + "loss": 0.8345413208007812, + "loss_ce": 0.00031285439035855234, + "loss_iou": 0.376953125, + "loss_num": 0.01611328125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 600216836, + "step": 4752 + }, + { + "epoch": 1.2192650548322965, + "grad_norm": 22.537038803100586, + "learning_rate": 5e-06, + "loss": 0.9082, + "num_input_tokens_seen": 600343580, + "step": 4753 + }, + { + "epoch": 1.2192650548322965, + "loss": 0.8719031810760498, + "loss_ce": 0.0020301304757595062, + "loss_iou": 0.404296875, + "loss_num": 0.0118408203125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 600343580, + "step": 4753 + }, + { + "epoch": 1.219521580196242, + "grad_norm": 23.62120246887207, + "learning_rate": 5e-06, + "loss": 0.8108, + "num_input_tokens_seen": 600469504, + "step": 4754 + }, + { + "epoch": 1.219521580196242, + "loss": 0.8330411314964294, + "loss_ce": 0.0007657122332602739, + "loss_iou": 0.388671875, + "loss_num": 0.01068115234375, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 600469504, + "step": 4754 + }, + { + "epoch": 1.2197781055601873, + "grad_norm": 26.64427375793457, + "learning_rate": 5e-06, + "loss": 0.8665, + "num_input_tokens_seen": 600596768, + "step": 4755 + }, + { + "epoch": 1.2197781055601873, + "loss": 0.7889052629470825, + "loss_ce": 0.0008193481480702758, + "loss_iou": 0.361328125, + "loss_num": 0.0126953125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 600596768, + "step": 4755 + }, + { + "epoch": 1.2200346309241326, + "grad_norm": 44.88630676269531, + "learning_rate": 5e-06, + "loss": 0.8487, + "num_input_tokens_seen": 600723824, + "step": 4756 + }, + { + "epoch": 1.2200346309241326, + "loss": 0.9842228293418884, + "loss_ce": 0.0008243804331868887, + "loss_iou": 0.462890625, + "loss_num": 0.0111083984375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 600723824, + "step": 4756 + }, + { + "epoch": 1.220291156288078, + "grad_norm": 28.47319793701172, + "learning_rate": 5e-06, + "loss": 0.7949, + "num_input_tokens_seen": 600849744, + "step": 4757 + }, + { + "epoch": 1.220291156288078, + "loss": 0.8828399181365967, + "loss_ce": 0.0014922046102583408, + "loss_iou": 0.400390625, + "loss_num": 0.0159912109375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 600849744, + "step": 4757 + }, + { + "epoch": 1.2205476816520233, + "grad_norm": 33.03736877441406, + "learning_rate": 5e-06, + "loss": 0.8982, + "num_input_tokens_seen": 600976260, + "step": 4758 + }, + { + "epoch": 1.2205476816520233, + "loss": 0.892457902431488, + "loss_ce": 0.00036802445538342, + "loss_iou": 0.412109375, + "loss_num": 0.01385498046875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 600976260, + "step": 4758 + }, + { + "epoch": 1.2208042070159686, + "grad_norm": 31.39913558959961, + "learning_rate": 5e-06, + "loss": 0.9025, + "num_input_tokens_seen": 601101028, + "step": 4759 + }, + { + "epoch": 1.2208042070159686, + "loss": 0.8718372583389282, + "loss_ce": 0.002452526707202196, + "loss_iou": 0.390625, + "loss_num": 0.017578125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 601101028, + "step": 4759 + }, + { + "epoch": 1.221060732379914, + "grad_norm": 27.543079376220703, + "learning_rate": 5e-06, + "loss": 0.824, + "num_input_tokens_seen": 601226648, + "step": 4760 + }, + { + "epoch": 1.221060732379914, + "loss": 0.8160004615783691, + "loss_ce": 0.0013032081769779325, + "loss_iou": 0.384765625, + "loss_num": 0.00921630859375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 601226648, + "step": 4760 + }, + { + "epoch": 1.2213172577438594, + "grad_norm": 35.303218841552734, + "learning_rate": 5e-06, + "loss": 0.8795, + "num_input_tokens_seen": 601353616, + "step": 4761 + }, + { + "epoch": 1.2213172577438594, + "loss": 0.8158635497093201, + "loss_ce": 0.00043385877506807446, + "loss_iou": 0.373046875, + "loss_num": 0.01373291015625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 601353616, + "step": 4761 + }, + { + "epoch": 1.2215737831078048, + "grad_norm": 29.56807518005371, + "learning_rate": 5e-06, + "loss": 0.8547, + "num_input_tokens_seen": 601479948, + "step": 4762 + }, + { + "epoch": 1.2215737831078048, + "loss": 0.946003794670105, + "loss_ce": 0.0009353643981739879, + "loss_iou": 0.423828125, + "loss_num": 0.01953125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 601479948, + "step": 4762 + }, + { + "epoch": 1.2218303084717501, + "grad_norm": 28.149768829345703, + "learning_rate": 5e-06, + "loss": 0.7609, + "num_input_tokens_seen": 601607136, + "step": 4763 + }, + { + "epoch": 1.2218303084717501, + "loss": 0.7574147582054138, + "loss_ce": 0.0025319445412606, + "loss_iou": 0.349609375, + "loss_num": 0.01080322265625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 601607136, + "step": 4763 + }, + { + "epoch": 1.2220868338356956, + "grad_norm": 32.912193298339844, + "learning_rate": 5e-06, + "loss": 0.8737, + "num_input_tokens_seen": 601732460, + "step": 4764 + }, + { + "epoch": 1.2220868338356956, + "loss": 0.7749499082565308, + "loss_ce": 0.0005358686903491616, + "loss_iou": 0.357421875, + "loss_num": 0.01220703125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 601732460, + "step": 4764 + }, + { + "epoch": 1.2223433591996409, + "grad_norm": 51.268104553222656, + "learning_rate": 5e-06, + "loss": 0.9608, + "num_input_tokens_seen": 601858828, + "step": 4765 + }, + { + "epoch": 1.2223433591996409, + "loss": 0.7851808667182922, + "loss_ce": 0.00026872724993154407, + "loss_iou": 0.3671875, + "loss_num": 0.010009765625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 601858828, + "step": 4765 + }, + { + "epoch": 1.2225998845635861, + "grad_norm": 49.083675384521484, + "learning_rate": 5e-06, + "loss": 0.8196, + "num_input_tokens_seen": 601985756, + "step": 4766 + }, + { + "epoch": 1.2225998845635861, + "loss": 0.7626669406890869, + "loss_ce": 0.0016805990599095821, + "loss_iou": 0.33984375, + "loss_num": 0.0166015625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 601985756, + "step": 4766 + }, + { + "epoch": 1.2228564099275316, + "grad_norm": 47.85071563720703, + "learning_rate": 5e-06, + "loss": 0.8149, + "num_input_tokens_seen": 602112568, + "step": 4767 + }, + { + "epoch": 1.2228564099275316, + "loss": 0.6594139337539673, + "loss_ce": 0.0004783686308655888, + "loss_iou": 0.31640625, + "loss_num": 0.005218505859375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 602112568, + "step": 4767 + }, + { + "epoch": 1.223112935291477, + "grad_norm": 56.944252014160156, + "learning_rate": 5e-06, + "loss": 0.7413, + "num_input_tokens_seen": 602240072, + "step": 4768 + }, + { + "epoch": 1.223112935291477, + "loss": 0.7971140146255493, + "loss_ce": 0.00023901810345705599, + "loss_iou": 0.376953125, + "loss_num": 0.008544921875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 602240072, + "step": 4768 + }, + { + "epoch": 1.2233694606554222, + "grad_norm": 37.38105010986328, + "learning_rate": 5e-06, + "loss": 0.9469, + "num_input_tokens_seen": 602365636, + "step": 4769 + }, + { + "epoch": 1.2233694606554222, + "loss": 1.0167577266693115, + "loss_ce": 0.00015614864241797477, + "loss_iou": 0.466796875, + "loss_num": 0.0166015625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 602365636, + "step": 4769 + }, + { + "epoch": 1.2236259860193677, + "grad_norm": 17.443819046020508, + "learning_rate": 5e-06, + "loss": 0.8893, + "num_input_tokens_seen": 602491180, + "step": 4770 + }, + { + "epoch": 1.2236259860193677, + "loss": 0.7825367450714111, + "loss_ce": 0.0005543669685721397, + "loss_iou": 0.365234375, + "loss_num": 0.01055908203125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 602491180, + "step": 4770 + }, + { + "epoch": 1.223882511383313, + "grad_norm": 30.18058967590332, + "learning_rate": 5e-06, + "loss": 0.7903, + "num_input_tokens_seen": 602616860, + "step": 4771 + }, + { + "epoch": 1.223882511383313, + "loss": 0.789797306060791, + "loss_ce": 0.0004906684625893831, + "loss_iou": 0.365234375, + "loss_num": 0.0118408203125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 602616860, + "step": 4771 + }, + { + "epoch": 1.2241390367472584, + "grad_norm": 36.23856735229492, + "learning_rate": 5e-06, + "loss": 0.7848, + "num_input_tokens_seen": 602744232, + "step": 4772 + }, + { + "epoch": 1.2241390367472584, + "loss": 0.9037405252456665, + "loss_ce": 0.0004202028503641486, + "loss_iou": 0.42578125, + "loss_num": 0.01055908203125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 602744232, + "step": 4772 + }, + { + "epoch": 1.2243955621112037, + "grad_norm": 32.48878479003906, + "learning_rate": 5e-06, + "loss": 0.8816, + "num_input_tokens_seen": 602870612, + "step": 4773 + }, + { + "epoch": 1.2243955621112037, + "loss": 1.036409616470337, + "loss_ce": 0.001741634914651513, + "loss_iou": 0.46875, + "loss_num": 0.0191650390625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 602870612, + "step": 4773 + }, + { + "epoch": 1.2246520874751492, + "grad_norm": 59.387298583984375, + "learning_rate": 5e-06, + "loss": 0.9686, + "num_input_tokens_seen": 602997192, + "step": 4774 + }, + { + "epoch": 1.2246520874751492, + "loss": 1.0598663091659546, + "loss_ce": 0.0005401476519182324, + "loss_iou": 0.46484375, + "loss_num": 0.0264892578125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 602997192, + "step": 4774 + }, + { + "epoch": 1.2249086128390945, + "grad_norm": 249.78233337402344, + "learning_rate": 5e-06, + "loss": 0.9323, + "num_input_tokens_seen": 603122928, + "step": 4775 + }, + { + "epoch": 1.2249086128390945, + "loss": 0.9570778608322144, + "loss_ce": 0.0010231432970613241, + "loss_iou": 0.451171875, + "loss_num": 0.0106201171875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 603122928, + "step": 4775 + }, + { + "epoch": 1.2251651382030397, + "grad_norm": 22.128570556640625, + "learning_rate": 5e-06, + "loss": 0.8064, + "num_input_tokens_seen": 603249100, + "step": 4776 + }, + { + "epoch": 1.2251651382030397, + "loss": 0.8527721166610718, + "loss_ce": 0.00047719833673909307, + "loss_iou": 0.384765625, + "loss_num": 0.0166015625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 603249100, + "step": 4776 + }, + { + "epoch": 1.2254216635669852, + "grad_norm": 46.06595993041992, + "learning_rate": 5e-06, + "loss": 0.9489, + "num_input_tokens_seen": 603376368, + "step": 4777 + }, + { + "epoch": 1.2254216635669852, + "loss": 0.8939422369003296, + "loss_ce": 0.0011199985165148973, + "loss_iou": 0.41015625, + "loss_num": 0.0147705078125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 603376368, + "step": 4777 + }, + { + "epoch": 1.2256781889309305, + "grad_norm": 52.95048141479492, + "learning_rate": 5e-06, + "loss": 0.8948, + "num_input_tokens_seen": 603502644, + "step": 4778 + }, + { + "epoch": 1.2256781889309305, + "loss": 0.8711966276168823, + "loss_ce": 0.00010290517820976675, + "loss_iou": 0.412109375, + "loss_num": 0.009033203125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 603502644, + "step": 4778 + }, + { + "epoch": 1.225934714294876, + "grad_norm": 35.8829460144043, + "learning_rate": 5e-06, + "loss": 0.9167, + "num_input_tokens_seen": 603627316, + "step": 4779 + }, + { + "epoch": 1.225934714294876, + "loss": 0.9704164862632751, + "loss_ce": 0.00020169885829091072, + "loss_iou": 0.44140625, + "loss_num": 0.0174560546875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 603627316, + "step": 4779 + }, + { + "epoch": 1.2261912396588213, + "grad_norm": 25.7741756439209, + "learning_rate": 5e-06, + "loss": 0.7656, + "num_input_tokens_seen": 603753852, + "step": 4780 + }, + { + "epoch": 1.2261912396588213, + "loss": 0.686504065990448, + "loss_ce": 0.0002247951051685959, + "loss_iou": 0.3203125, + "loss_num": 0.00897216796875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 603753852, + "step": 4780 + }, + { + "epoch": 1.2264477650227665, + "grad_norm": 40.98419189453125, + "learning_rate": 5e-06, + "loss": 0.9614, + "num_input_tokens_seen": 603879504, + "step": 4781 + }, + { + "epoch": 1.2264477650227665, + "loss": 1.0105911493301392, + "loss_ce": 0.001802072860300541, + "loss_iou": 0.451171875, + "loss_num": 0.0211181640625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 603879504, + "step": 4781 + }, + { + "epoch": 1.226704290386712, + "grad_norm": 55.55708312988281, + "learning_rate": 5e-06, + "loss": 0.8129, + "num_input_tokens_seen": 604005292, + "step": 4782 + }, + { + "epoch": 1.226704290386712, + "loss": 0.8160561919212341, + "loss_ce": 0.0006265242118388414, + "loss_iou": 0.38671875, + "loss_num": 0.0084228515625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 604005292, + "step": 4782 + }, + { + "epoch": 1.2269608157506573, + "grad_norm": 51.166046142578125, + "learning_rate": 5e-06, + "loss": 0.9448, + "num_input_tokens_seen": 604131444, + "step": 4783 + }, + { + "epoch": 1.2269608157506573, + "loss": 0.828851580619812, + "loss_ce": 0.0014589702477678657, + "loss_iou": 0.376953125, + "loss_num": 0.01434326171875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 604131444, + "step": 4783 + }, + { + "epoch": 1.2272173411146028, + "grad_norm": 38.290218353271484, + "learning_rate": 5e-06, + "loss": 0.959, + "num_input_tokens_seen": 604254772, + "step": 4784 + }, + { + "epoch": 1.2272173411146028, + "loss": 1.047408938407898, + "loss_ce": 0.00248708832077682, + "loss_iou": 0.46875, + "loss_num": 0.021484375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 604254772, + "step": 4784 + }, + { + "epoch": 1.227473866478548, + "grad_norm": 49.29533767700195, + "learning_rate": 5e-06, + "loss": 0.9122, + "num_input_tokens_seen": 604380968, + "step": 4785 + }, + { + "epoch": 1.227473866478548, + "loss": 1.0329957008361816, + "loss_ce": 0.000769149512052536, + "loss_iou": 0.4765625, + "loss_num": 0.0157470703125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 604380968, + "step": 4785 + }, + { + "epoch": 1.2277303918424933, + "grad_norm": 55.00996017456055, + "learning_rate": 5e-06, + "loss": 0.8781, + "num_input_tokens_seen": 604506804, + "step": 4786 + }, + { + "epoch": 1.2277303918424933, + "loss": 0.8428975939750671, + "loss_ce": 0.0006124571664258838, + "loss_iou": 0.392578125, + "loss_num": 0.01153564453125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 604506804, + "step": 4786 + }, + { + "epoch": 1.2279869172064388, + "grad_norm": 49.223751068115234, + "learning_rate": 5e-06, + "loss": 0.8161, + "num_input_tokens_seen": 604632964, + "step": 4787 + }, + { + "epoch": 1.2279869172064388, + "loss": 0.8660851716995239, + "loss_ce": 0.0013390433741733432, + "loss_iou": 0.40234375, + "loss_num": 0.012451171875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 604632964, + "step": 4787 + }, + { + "epoch": 1.228243442570384, + "grad_norm": 59.02398681640625, + "learning_rate": 5e-06, + "loss": 0.7592, + "num_input_tokens_seen": 604760136, + "step": 4788 + }, + { + "epoch": 1.228243442570384, + "loss": 0.6717700362205505, + "loss_ce": 0.0020923118572682142, + "loss_iou": 0.3125, + "loss_num": 0.0093994140625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 604760136, + "step": 4788 + }, + { + "epoch": 1.2284999679343296, + "grad_norm": 37.43214797973633, + "learning_rate": 5e-06, + "loss": 1.0007, + "num_input_tokens_seen": 604886588, + "step": 4789 + }, + { + "epoch": 1.2284999679343296, + "loss": 1.0299526453018188, + "loss_ce": 0.0016322660958394408, + "loss_iou": 0.478515625, + "loss_num": 0.01434326171875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 604886588, + "step": 4789 + }, + { + "epoch": 1.2287564932982749, + "grad_norm": 19.44113540649414, + "learning_rate": 5e-06, + "loss": 0.8509, + "num_input_tokens_seen": 605011564, + "step": 4790 + }, + { + "epoch": 1.2287564932982749, + "loss": 0.8398631811141968, + "loss_ce": 0.0029490659944713116, + "loss_iou": 0.376953125, + "loss_num": 0.0166015625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 605011564, + "step": 4790 + }, + { + "epoch": 1.2290130186622203, + "grad_norm": 22.47450065612793, + "learning_rate": 5e-06, + "loss": 0.7659, + "num_input_tokens_seen": 605137856, + "step": 4791 + }, + { + "epoch": 1.2290130186622203, + "loss": 0.7634186744689941, + "loss_ce": 0.00023511916515417397, + "loss_iou": 0.359375, + "loss_num": 0.00860595703125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 605137856, + "step": 4791 + }, + { + "epoch": 1.2292695440261656, + "grad_norm": 41.92300796508789, + "learning_rate": 5e-06, + "loss": 0.9322, + "num_input_tokens_seen": 605262204, + "step": 4792 + }, + { + "epoch": 1.2292695440261656, + "loss": 1.039194107055664, + "loss_ce": 0.0015964285703375936, + "loss_iou": 0.458984375, + "loss_num": 0.02392578125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 605262204, + "step": 4792 + }, + { + "epoch": 1.2295260693901109, + "grad_norm": 39.05963897705078, + "learning_rate": 5e-06, + "loss": 0.8749, + "num_input_tokens_seen": 605388516, + "step": 4793 + }, + { + "epoch": 1.2295260693901109, + "loss": 0.8081142902374268, + "loss_ce": 0.000985379796475172, + "loss_iou": 0.373046875, + "loss_num": 0.01239013671875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 605388516, + "step": 4793 + }, + { + "epoch": 1.2297825947540564, + "grad_norm": 162.90028381347656, + "learning_rate": 5e-06, + "loss": 0.9109, + "num_input_tokens_seen": 605514728, + "step": 4794 + }, + { + "epoch": 1.2297825947540564, + "loss": 0.79775071144104, + "loss_ce": 0.00014325222582556307, + "loss_iou": 0.365234375, + "loss_num": 0.0130615234375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 605514728, + "step": 4794 + }, + { + "epoch": 1.2300391201180016, + "grad_norm": 37.66619873046875, + "learning_rate": 5e-06, + "loss": 0.7623, + "num_input_tokens_seen": 605640300, + "step": 4795 + }, + { + "epoch": 1.2300391201180016, + "loss": 0.7027794122695923, + "loss_ce": 0.0001426686649210751, + "loss_iou": 0.337890625, + "loss_num": 0.005218505859375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 605640300, + "step": 4795 + }, + { + "epoch": 1.230295645481947, + "grad_norm": 38.53131103515625, + "learning_rate": 5e-06, + "loss": 0.8167, + "num_input_tokens_seen": 605766236, + "step": 4796 + }, + { + "epoch": 1.230295645481947, + "loss": 0.9339948892593384, + "loss_ce": 0.00040114152943715453, + "loss_iou": 0.42578125, + "loss_num": 0.0166015625, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 605766236, + "step": 4796 + }, + { + "epoch": 1.2305521708458924, + "grad_norm": 40.22769546508789, + "learning_rate": 5e-06, + "loss": 0.8585, + "num_input_tokens_seen": 605893152, + "step": 4797 + }, + { + "epoch": 1.2305521708458924, + "loss": 1.1277151107788086, + "loss_ce": 0.0012503860052675009, + "loss_iou": 0.5234375, + "loss_num": 0.0152587890625, + "loss_xval": 1.125, + "num_input_tokens_seen": 605893152, + "step": 4797 + }, + { + "epoch": 1.2308086962098377, + "grad_norm": 33.27394104003906, + "learning_rate": 5e-06, + "loss": 0.8316, + "num_input_tokens_seen": 606020620, + "step": 4798 + }, + { + "epoch": 1.2308086962098377, + "loss": 0.7004101276397705, + "loss_ce": 0.00045895882067270577, + "loss_iou": 0.330078125, + "loss_num": 0.00811767578125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 606020620, + "step": 4798 + }, + { + "epoch": 1.2310652215737832, + "grad_norm": 28.13202667236328, + "learning_rate": 5e-06, + "loss": 0.9139, + "num_input_tokens_seen": 606147068, + "step": 4799 + }, + { + "epoch": 1.2310652215737832, + "loss": 0.7489627599716187, + "loss_ce": 0.0004276205145288259, + "loss_iou": 0.3515625, + "loss_num": 0.0091552734375, + "loss_xval": 0.75, + "num_input_tokens_seen": 606147068, + "step": 4799 + }, + { + "epoch": 1.2313217469377284, + "grad_norm": 37.590694427490234, + "learning_rate": 5e-06, + "loss": 0.8537, + "num_input_tokens_seen": 606274148, + "step": 4800 + }, + { + "epoch": 1.2313217469377284, + "loss": 0.9435932040214539, + "loss_ce": 0.00023384805535897613, + "loss_iou": 0.4453125, + "loss_num": 0.0108642578125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 606274148, + "step": 4800 + }, + { + "epoch": 1.231578272301674, + "grad_norm": 26.834115982055664, + "learning_rate": 5e-06, + "loss": 0.8367, + "num_input_tokens_seen": 606400028, + "step": 4801 + }, + { + "epoch": 1.231578272301674, + "loss": 0.6948038935661316, + "loss_ce": 0.0002238238521385938, + "loss_iou": 0.33203125, + "loss_num": 0.006011962890625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 606400028, + "step": 4801 + }, + { + "epoch": 1.2318347976656192, + "grad_norm": 26.551607131958008, + "learning_rate": 5e-06, + "loss": 0.8873, + "num_input_tokens_seen": 606526088, + "step": 4802 + }, + { + "epoch": 1.2318347976656192, + "loss": 0.8339190483093262, + "loss_ce": 0.0021320083178579807, + "loss_iou": 0.37890625, + "loss_num": 0.0152587890625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 606526088, + "step": 4802 + }, + { + "epoch": 1.2320913230295645, + "grad_norm": 30.444360733032227, + "learning_rate": 5e-06, + "loss": 0.7898, + "num_input_tokens_seen": 606652276, + "step": 4803 + }, + { + "epoch": 1.2320913230295645, + "loss": 0.9319490194320679, + "loss_ce": 0.0005525393644347787, + "loss_iou": 0.4375, + "loss_num": 0.0115966796875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 606652276, + "step": 4803 + }, + { + "epoch": 1.23234784839351, + "grad_norm": 43.92644500732422, + "learning_rate": 5e-06, + "loss": 0.926, + "num_input_tokens_seen": 606777968, + "step": 4804 + }, + { + "epoch": 1.23234784839351, + "loss": 0.7606326341629028, + "loss_ce": 0.0018436069367453456, + "loss_iou": 0.357421875, + "loss_num": 0.008544921875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 606777968, + "step": 4804 + }, + { + "epoch": 1.2326043737574552, + "grad_norm": 25.221158981323242, + "learning_rate": 5e-06, + "loss": 0.8324, + "num_input_tokens_seen": 606902516, + "step": 4805 + }, + { + "epoch": 1.2326043737574552, + "loss": 0.8995772004127502, + "loss_ce": 0.002360400278121233, + "loss_iou": 0.4140625, + "loss_num": 0.01422119140625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 606902516, + "step": 4805 + }, + { + "epoch": 1.2328608991214005, + "grad_norm": 14.783366203308105, + "learning_rate": 5e-06, + "loss": 0.7519, + "num_input_tokens_seen": 607028760, + "step": 4806 + }, + { + "epoch": 1.2328608991214005, + "loss": 0.7393022775650024, + "loss_ce": 4.4474872993305326e-05, + "loss_iou": 0.3515625, + "loss_num": 0.00738525390625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 607028760, + "step": 4806 + }, + { + "epoch": 1.233117424485346, + "grad_norm": 148.4822540283203, + "learning_rate": 5e-06, + "loss": 0.8242, + "num_input_tokens_seen": 607154808, + "step": 4807 + }, + { + "epoch": 1.233117424485346, + "loss": 0.8717177510261536, + "loss_ce": 0.00013574768672697246, + "loss_iou": 0.392578125, + "loss_num": 0.01708984375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 607154808, + "step": 4807 + }, + { + "epoch": 1.2333739498492913, + "grad_norm": 35.75702667236328, + "learning_rate": 5e-06, + "loss": 0.8612, + "num_input_tokens_seen": 607281572, + "step": 4808 + }, + { + "epoch": 1.2333739498492913, + "loss": 0.7554574012756348, + "loss_ce": 0.0005746155511587858, + "loss_iou": 0.34765625, + "loss_num": 0.01214599609375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 607281572, + "step": 4808 + }, + { + "epoch": 1.2336304752132368, + "grad_norm": 32.40003204345703, + "learning_rate": 5e-06, + "loss": 0.8833, + "num_input_tokens_seen": 607407424, + "step": 4809 + }, + { + "epoch": 1.2336304752132368, + "loss": 1.0377521514892578, + "loss_ce": 0.008699383586645126, + "loss_iou": 0.48046875, + "loss_num": 0.01348876953125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 607407424, + "step": 4809 + }, + { + "epoch": 1.233887000577182, + "grad_norm": 46.07844543457031, + "learning_rate": 5e-06, + "loss": 0.837, + "num_input_tokens_seen": 607533704, + "step": 4810 + }, + { + "epoch": 1.233887000577182, + "loss": 1.2487030029296875, + "loss_ce": 0.0001677718828432262, + "loss_iou": 0.55078125, + "loss_num": 0.030029296875, + "loss_xval": 1.25, + "num_input_tokens_seen": 607533704, + "step": 4810 + }, + { + "epoch": 1.2341435259411275, + "grad_norm": 44.47356414794922, + "learning_rate": 5e-06, + "loss": 0.8803, + "num_input_tokens_seen": 607659840, + "step": 4811 + }, + { + "epoch": 1.2341435259411275, + "loss": 0.8940377235412598, + "loss_ce": 0.0007271752692759037, + "loss_iou": 0.40625, + "loss_num": 0.0164794921875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 607659840, + "step": 4811 + }, + { + "epoch": 1.2344000513050728, + "grad_norm": 42.178070068359375, + "learning_rate": 5e-06, + "loss": 0.8101, + "num_input_tokens_seen": 607787148, + "step": 4812 + }, + { + "epoch": 1.2344000513050728, + "loss": 0.8719956874847412, + "loss_ce": 0.0018784594722092152, + "loss_iou": 0.390625, + "loss_num": 0.017578125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 607787148, + "step": 4812 + }, + { + "epoch": 1.234656576669018, + "grad_norm": 47.128814697265625, + "learning_rate": 5e-06, + "loss": 0.8365, + "num_input_tokens_seen": 607913340, + "step": 4813 + }, + { + "epoch": 1.234656576669018, + "loss": 0.6522765159606934, + "loss_ce": 0.0004210706101730466, + "loss_iou": 0.3125, + "loss_num": 0.00531005859375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 607913340, + "step": 4813 + }, + { + "epoch": 1.2349131020329636, + "grad_norm": 37.64466094970703, + "learning_rate": 5e-06, + "loss": 0.8265, + "num_input_tokens_seen": 608039124, + "step": 4814 + }, + { + "epoch": 1.2349131020329636, + "loss": 0.8613240718841553, + "loss_ce": 0.00048426154535263777, + "loss_iou": 0.40234375, + "loss_num": 0.01116943359375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 608039124, + "step": 4814 + }, + { + "epoch": 1.2351696273969088, + "grad_norm": 33.64643096923828, + "learning_rate": 5e-06, + "loss": 0.8477, + "num_input_tokens_seen": 608166200, + "step": 4815 + }, + { + "epoch": 1.2351696273969088, + "loss": 0.8707019090652466, + "loss_ce": 0.0003406030300538987, + "loss_iou": 0.38671875, + "loss_num": 0.019287109375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 608166200, + "step": 4815 + }, + { + "epoch": 1.2354261527608543, + "grad_norm": 34.57878494262695, + "learning_rate": 5e-06, + "loss": 0.8019, + "num_input_tokens_seen": 608293192, + "step": 4816 + }, + { + "epoch": 1.2354261527608543, + "loss": 0.7439565658569336, + "loss_ce": 6.011620280332863e-05, + "loss_iou": 0.349609375, + "loss_num": 0.00946044921875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 608293192, + "step": 4816 + }, + { + "epoch": 1.2356826781247996, + "grad_norm": 37.146244049072266, + "learning_rate": 5e-06, + "loss": 0.9266, + "num_input_tokens_seen": 608419644, + "step": 4817 + }, + { + "epoch": 1.2356826781247996, + "loss": 0.9923871755599976, + "loss_ce": 0.0014203899772837758, + "loss_iou": 0.451171875, + "loss_num": 0.017822265625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 608419644, + "step": 4817 + }, + { + "epoch": 1.2359392034887449, + "grad_norm": 32.34402084350586, + "learning_rate": 5e-06, + "loss": 0.7883, + "num_input_tokens_seen": 608546708, + "step": 4818 + }, + { + "epoch": 1.2359392034887449, + "loss": 0.6896160840988159, + "loss_ce": 0.0001629363396205008, + "loss_iou": 0.333984375, + "loss_num": 0.00439453125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 608546708, + "step": 4818 + }, + { + "epoch": 1.2361957288526904, + "grad_norm": 42.71183776855469, + "learning_rate": 5e-06, + "loss": 0.8965, + "num_input_tokens_seen": 608672416, + "step": 4819 + }, + { + "epoch": 1.2361957288526904, + "loss": 1.1670942306518555, + "loss_ce": 0.002055141143500805, + "loss_iou": 0.5078125, + "loss_num": 0.0303955078125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 608672416, + "step": 4819 + }, + { + "epoch": 1.2364522542166356, + "grad_norm": 50.39683532714844, + "learning_rate": 5e-06, + "loss": 0.8926, + "num_input_tokens_seen": 608798828, + "step": 4820 + }, + { + "epoch": 1.2364522542166356, + "loss": 0.9353948831558228, + "loss_ce": 0.0010687229223549366, + "loss_iou": 0.427734375, + "loss_num": 0.0157470703125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 608798828, + "step": 4820 + }, + { + "epoch": 1.2367087795805811, + "grad_norm": 44.45790481567383, + "learning_rate": 5e-06, + "loss": 0.857, + "num_input_tokens_seen": 608924084, + "step": 4821 + }, + { + "epoch": 1.2367087795805811, + "loss": 0.7591259479522705, + "loss_ce": 0.00033689866540953517, + "loss_iou": 0.3671875, + "loss_num": 0.00494384765625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 608924084, + "step": 4821 + }, + { + "epoch": 1.2369653049445264, + "grad_norm": 40.548561096191406, + "learning_rate": 5e-06, + "loss": 0.8512, + "num_input_tokens_seen": 609050740, + "step": 4822 + }, + { + "epoch": 1.2369653049445264, + "loss": 0.7823129892349243, + "loss_ce": 0.0005746952374465764, + "loss_iou": 0.36328125, + "loss_num": 0.0111083984375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 609050740, + "step": 4822 + }, + { + "epoch": 1.2372218303084717, + "grad_norm": 46.11857986450195, + "learning_rate": 5e-06, + "loss": 0.8331, + "num_input_tokens_seen": 609176604, + "step": 4823 + }, + { + "epoch": 1.2372218303084717, + "loss": 0.8695831298828125, + "loss_ce": 0.0009307708824053407, + "loss_iou": 0.412109375, + "loss_num": 0.009033203125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 609176604, + "step": 4823 + }, + { + "epoch": 1.2374783556724172, + "grad_norm": 55.450401306152344, + "learning_rate": 5e-06, + "loss": 0.8709, + "num_input_tokens_seen": 609302436, + "step": 4824 + }, + { + "epoch": 1.2374783556724172, + "loss": 0.8968358039855957, + "loss_ce": 0.004990077577531338, + "loss_iou": 0.396484375, + "loss_num": 0.01953125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 609302436, + "step": 4824 + }, + { + "epoch": 1.2377348810363624, + "grad_norm": 52.03253173828125, + "learning_rate": 5e-06, + "loss": 0.8546, + "num_input_tokens_seen": 609429244, + "step": 4825 + }, + { + "epoch": 1.2377348810363624, + "loss": 0.8602542877197266, + "loss_ce": 0.00014686226495541632, + "loss_iou": 0.40234375, + "loss_num": 0.01092529296875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 609429244, + "step": 4825 + }, + { + "epoch": 1.237991406400308, + "grad_norm": 36.153167724609375, + "learning_rate": 5e-06, + "loss": 0.8551, + "num_input_tokens_seen": 609555540, + "step": 4826 + }, + { + "epoch": 1.237991406400308, + "loss": 0.912063479423523, + "loss_ce": 0.0021513879764825106, + "loss_iou": 0.41796875, + "loss_num": 0.01470947265625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 609555540, + "step": 4826 + }, + { + "epoch": 1.2382479317642532, + "grad_norm": 37.33576965332031, + "learning_rate": 5e-06, + "loss": 1.008, + "num_input_tokens_seen": 609681316, + "step": 4827 + }, + { + "epoch": 1.2382479317642532, + "loss": 1.1826121807098389, + "loss_ce": 0.0004832570266444236, + "loss_iou": 0.53515625, + "loss_num": 0.02294921875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 609681316, + "step": 4827 + }, + { + "epoch": 1.2385044571281985, + "grad_norm": 54.23225402832031, + "learning_rate": 5e-06, + "loss": 0.8817, + "num_input_tokens_seen": 609807580, + "step": 4828 + }, + { + "epoch": 1.2385044571281985, + "loss": 0.7838513851165771, + "loss_ce": 0.001136545673944056, + "loss_iou": 0.3671875, + "loss_num": 0.00927734375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 609807580, + "step": 4828 + }, + { + "epoch": 1.238760982492144, + "grad_norm": 49.90633773803711, + "learning_rate": 5e-06, + "loss": 0.9805, + "num_input_tokens_seen": 609934092, + "step": 4829 + }, + { + "epoch": 1.238760982492144, + "loss": 0.8007910251617432, + "loss_ce": 0.001230464898981154, + "loss_iou": 0.375, + "loss_num": 0.01019287109375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 609934092, + "step": 4829 + }, + { + "epoch": 1.2390175078560892, + "grad_norm": 31.10322380065918, + "learning_rate": 5e-06, + "loss": 0.9372, + "num_input_tokens_seen": 610059916, + "step": 4830 + }, + { + "epoch": 1.2390175078560892, + "loss": 1.1238504648208618, + "loss_ce": 0.0017802028451114893, + "loss_iou": 0.51171875, + "loss_num": 0.02001953125, + "loss_xval": 1.125, + "num_input_tokens_seen": 610059916, + "step": 4830 + }, + { + "epoch": 1.2392740332200347, + "grad_norm": 26.670007705688477, + "learning_rate": 5e-06, + "loss": 0.9373, + "num_input_tokens_seen": 610185820, + "step": 4831 + }, + { + "epoch": 1.2392740332200347, + "loss": 0.888588547706604, + "loss_ce": 0.00016078323824331164, + "loss_iou": 0.396484375, + "loss_num": 0.0194091796875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 610185820, + "step": 4831 + }, + { + "epoch": 1.23953055858398, + "grad_norm": 24.779130935668945, + "learning_rate": 5e-06, + "loss": 0.9063, + "num_input_tokens_seen": 610311188, + "step": 4832 + }, + { + "epoch": 1.23953055858398, + "loss": 0.9834119081497192, + "loss_ce": 0.007825959473848343, + "loss_iou": 0.439453125, + "loss_num": 0.01953125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 610311188, + "step": 4832 + }, + { + "epoch": 1.2397870839479253, + "grad_norm": 39.526580810546875, + "learning_rate": 5e-06, + "loss": 0.7785, + "num_input_tokens_seen": 610439012, + "step": 4833 + }, + { + "epoch": 1.2397870839479253, + "loss": 0.7931700944900513, + "loss_ce": 0.0004455468151718378, + "loss_iou": 0.380859375, + "loss_num": 0.006103515625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 610439012, + "step": 4833 + }, + { + "epoch": 1.2400436093118707, + "grad_norm": 49.415714263916016, + "learning_rate": 5e-06, + "loss": 0.9307, + "num_input_tokens_seen": 610565996, + "step": 4834 + }, + { + "epoch": 1.2400436093118707, + "loss": 0.9404745101928711, + "loss_ce": 0.0010214148787781596, + "loss_iou": 0.439453125, + "loss_num": 0.01226806640625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 610565996, + "step": 4834 + }, + { + "epoch": 1.240300134675816, + "grad_norm": 39.927276611328125, + "learning_rate": 5e-06, + "loss": 0.8959, + "num_input_tokens_seen": 610692480, + "step": 4835 + }, + { + "epoch": 1.240300134675816, + "loss": 0.9145958423614502, + "loss_ce": 0.003463053610175848, + "loss_iou": 0.42578125, + "loss_num": 0.01165771484375, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 610692480, + "step": 4835 + }, + { + "epoch": 1.2405566600397615, + "grad_norm": 51.902427673339844, + "learning_rate": 5e-06, + "loss": 0.852, + "num_input_tokens_seen": 610818716, + "step": 4836 + }, + { + "epoch": 1.2405566600397615, + "loss": 0.7459242343902588, + "loss_ce": 7.462648500222713e-05, + "loss_iou": 0.35546875, + "loss_num": 0.007293701171875, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 610818716, + "step": 4836 + }, + { + "epoch": 1.2408131854037068, + "grad_norm": 52.84894561767578, + "learning_rate": 5e-06, + "loss": 0.8721, + "num_input_tokens_seen": 610943460, + "step": 4837 + }, + { + "epoch": 1.2408131854037068, + "loss": 0.8643629550933838, + "loss_ce": 0.00010515828034840524, + "loss_iou": 0.404296875, + "loss_num": 0.01129150390625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 610943460, + "step": 4837 + }, + { + "epoch": 1.2410697107676523, + "grad_norm": 53.94810104370117, + "learning_rate": 5e-06, + "loss": 0.833, + "num_input_tokens_seen": 611070192, + "step": 4838 + }, + { + "epoch": 1.2410697107676523, + "loss": 0.9315834641456604, + "loss_ce": 0.0001870039850473404, + "loss_iou": 0.431640625, + "loss_num": 0.013427734375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 611070192, + "step": 4838 + }, + { + "epoch": 1.2413262361315975, + "grad_norm": 54.83344650268555, + "learning_rate": 5e-06, + "loss": 0.948, + "num_input_tokens_seen": 611196436, + "step": 4839 + }, + { + "epoch": 1.2413262361315975, + "loss": 1.3058286905288696, + "loss_ce": 0.0030942833982408047, + "loss_iou": 0.5625, + "loss_num": 0.035400390625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 611196436, + "step": 4839 + }, + { + "epoch": 1.2415827614955428, + "grad_norm": 43.033504486083984, + "learning_rate": 5e-06, + "loss": 0.7495, + "num_input_tokens_seen": 611322388, + "step": 4840 + }, + { + "epoch": 1.2415827614955428, + "loss": 0.6101521253585815, + "loss_ce": 0.0002888529561460018, + "loss_iou": 0.29296875, + "loss_num": 0.004486083984375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 611322388, + "step": 4840 + }, + { + "epoch": 1.2418392868594883, + "grad_norm": 81.3130111694336, + "learning_rate": 5e-06, + "loss": 1.0601, + "num_input_tokens_seen": 611448556, + "step": 4841 + }, + { + "epoch": 1.2418392868594883, + "loss": 1.055915355682373, + "loss_ce": 0.0026925706770271063, + "loss_iou": 0.478515625, + "loss_num": 0.019287109375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 611448556, + "step": 4841 + }, + { + "epoch": 1.2420958122234336, + "grad_norm": 41.133548736572266, + "learning_rate": 5e-06, + "loss": 0.7095, + "num_input_tokens_seen": 611574700, + "step": 4842 + }, + { + "epoch": 1.2420958122234336, + "loss": 0.7274676561355591, + "loss_ce": 0.0009051474626176059, + "loss_iou": 0.345703125, + "loss_num": 0.0068359375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 611574700, + "step": 4842 + }, + { + "epoch": 1.2423523375873788, + "grad_norm": 35.57295227050781, + "learning_rate": 5e-06, + "loss": 0.853, + "num_input_tokens_seen": 611700180, + "step": 4843 + }, + { + "epoch": 1.2423523375873788, + "loss": 1.0150408744812012, + "loss_ce": 0.0008806661935523152, + "loss_iou": 0.46484375, + "loss_num": 0.016845703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 611700180, + "step": 4843 + }, + { + "epoch": 1.2426088629513243, + "grad_norm": 44.566001892089844, + "learning_rate": 5e-06, + "loss": 0.7506, + "num_input_tokens_seen": 611826520, + "step": 4844 + }, + { + "epoch": 1.2426088629513243, + "loss": 0.8877298831939697, + "loss_ce": 0.0005229068920016289, + "loss_iou": 0.400390625, + "loss_num": 0.017578125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 611826520, + "step": 4844 + }, + { + "epoch": 1.2428653883152696, + "grad_norm": 40.0380973815918, + "learning_rate": 5e-06, + "loss": 0.9387, + "num_input_tokens_seen": 611953436, + "step": 4845 + }, + { + "epoch": 1.2428653883152696, + "loss": 1.0390417575836182, + "loss_ce": 0.001199960126541555, + "loss_iou": 0.47265625, + "loss_num": 0.018798828125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 611953436, + "step": 4845 + }, + { + "epoch": 1.243121913679215, + "grad_norm": 22.591411590576172, + "learning_rate": 5e-06, + "loss": 0.7958, + "num_input_tokens_seen": 612080148, + "step": 4846 + }, + { + "epoch": 1.243121913679215, + "loss": 0.6195557117462158, + "loss_ce": 0.0001709434436634183, + "loss_iou": 0.294921875, + "loss_num": 0.005767822265625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 612080148, + "step": 4846 + }, + { + "epoch": 1.2433784390431604, + "grad_norm": 32.35323715209961, + "learning_rate": 5e-06, + "loss": 0.9023, + "num_input_tokens_seen": 612206576, + "step": 4847 + }, + { + "epoch": 1.2433784390431604, + "loss": 0.6743068099021912, + "loss_ce": 0.000722826924175024, + "loss_iou": 0.318359375, + "loss_num": 0.0074462890625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 612206576, + "step": 4847 + }, + { + "epoch": 1.2436349644071059, + "grad_norm": 36.899967193603516, + "learning_rate": 5e-06, + "loss": 0.8852, + "num_input_tokens_seen": 612333696, + "step": 4848 + }, + { + "epoch": 1.2436349644071059, + "loss": 0.9531025290489197, + "loss_ce": 0.0004658221441786736, + "loss_iou": 0.43359375, + "loss_num": 0.017333984375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 612333696, + "step": 4848 + }, + { + "epoch": 1.2438914897710511, + "grad_norm": 35.737945556640625, + "learning_rate": 5e-06, + "loss": 0.7892, + "num_input_tokens_seen": 612459852, + "step": 4849 + }, + { + "epoch": 1.2438914897710511, + "loss": 0.7735589742660522, + "loss_ce": 0.00012150005204603076, + "loss_iou": 0.361328125, + "loss_num": 0.00982666015625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 612459852, + "step": 4849 + }, + { + "epoch": 1.2441480151349964, + "grad_norm": 65.22116088867188, + "learning_rate": 5e-06, + "loss": 0.8864, + "num_input_tokens_seen": 612586224, + "step": 4850 + }, + { + "epoch": 1.2441480151349964, + "loss": 0.6678242087364197, + "loss_ce": 9.959124872693792e-05, + "loss_iou": 0.322265625, + "loss_num": 0.004669189453125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 612586224, + "step": 4850 + }, + { + "epoch": 1.244404540498942, + "grad_norm": 51.36000061035156, + "learning_rate": 5e-06, + "loss": 0.8881, + "num_input_tokens_seen": 612712628, + "step": 4851 + }, + { + "epoch": 1.244404540498942, + "loss": 0.9431815147399902, + "loss_ce": 0.00031036691507324576, + "loss_iou": 0.4296875, + "loss_num": 0.0167236328125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 612712628, + "step": 4851 + }, + { + "epoch": 1.2446610658628872, + "grad_norm": 39.94193649291992, + "learning_rate": 5e-06, + "loss": 0.7954, + "num_input_tokens_seen": 612839016, + "step": 4852 + }, + { + "epoch": 1.2446610658628872, + "loss": 0.7878487706184387, + "loss_ce": 0.0014718325110152364, + "loss_iou": 0.3671875, + "loss_num": 0.010498046875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 612839016, + "step": 4852 + }, + { + "epoch": 1.2449175912268324, + "grad_norm": 39.26067352294922, + "learning_rate": 5e-06, + "loss": 0.7526, + "num_input_tokens_seen": 612965524, + "step": 4853 + }, + { + "epoch": 1.2449175912268324, + "loss": 0.732310950756073, + "loss_ce": 0.002574614016339183, + "loss_iou": 0.33984375, + "loss_num": 0.00970458984375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 612965524, + "step": 4853 + }, + { + "epoch": 1.245174116590778, + "grad_norm": 47.21491241455078, + "learning_rate": 5e-06, + "loss": 0.8296, + "num_input_tokens_seen": 613091732, + "step": 4854 + }, + { + "epoch": 1.245174116590778, + "loss": 0.9390318989753723, + "loss_ce": 0.0029967008158564568, + "loss_iou": 0.4140625, + "loss_num": 0.0216064453125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 613091732, + "step": 4854 + }, + { + "epoch": 1.2454306419547232, + "grad_norm": 40.65690994262695, + "learning_rate": 5e-06, + "loss": 0.9531, + "num_input_tokens_seen": 613217776, + "step": 4855 + }, + { + "epoch": 1.2454306419547232, + "loss": 1.0387933254241943, + "loss_ce": 0.0026605348102748394, + "loss_iou": 0.45703125, + "loss_num": 0.0244140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 613217776, + "step": 4855 + }, + { + "epoch": 1.2456871673186687, + "grad_norm": 37.79996871948242, + "learning_rate": 5e-06, + "loss": 0.6888, + "num_input_tokens_seen": 613344016, + "step": 4856 + }, + { + "epoch": 1.2456871673186687, + "loss": 0.6156086921691895, + "loss_ce": 0.00013015670992899686, + "loss_iou": 0.296875, + "loss_num": 0.004638671875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 613344016, + "step": 4856 + }, + { + "epoch": 1.245943692682614, + "grad_norm": 36.82427978515625, + "learning_rate": 5e-06, + "loss": 0.8428, + "num_input_tokens_seen": 613469564, + "step": 4857 + }, + { + "epoch": 1.245943692682614, + "loss": 0.8867343068122864, + "loss_ce": 0.0007479985943064094, + "loss_iou": 0.40625, + "loss_num": 0.01446533203125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 613469564, + "step": 4857 + }, + { + "epoch": 1.2462002180465594, + "grad_norm": 37.523399353027344, + "learning_rate": 5e-06, + "loss": 0.8933, + "num_input_tokens_seen": 613595672, + "step": 4858 + }, + { + "epoch": 1.2462002180465594, + "loss": 0.9049521684646606, + "loss_ce": 0.00016698756371624768, + "loss_iou": 0.423828125, + "loss_num": 0.01104736328125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 613595672, + "step": 4858 + }, + { + "epoch": 1.2464567434105047, + "grad_norm": 42.28435516357422, + "learning_rate": 5e-06, + "loss": 0.864, + "num_input_tokens_seen": 613722532, + "step": 4859 + }, + { + "epoch": 1.2464567434105047, + "loss": 0.9065111875534058, + "loss_ce": 0.0022143484093248844, + "loss_iou": 0.423828125, + "loss_num": 0.011474609375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 613722532, + "step": 4859 + }, + { + "epoch": 1.24671326877445, + "grad_norm": 41.45710372924805, + "learning_rate": 5e-06, + "loss": 0.9046, + "num_input_tokens_seen": 613848048, + "step": 4860 + }, + { + "epoch": 1.24671326877445, + "loss": 0.9670527577400208, + "loss_ce": 0.0031855073757469654, + "loss_iou": 0.421875, + "loss_num": 0.0238037109375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 613848048, + "step": 4860 + }, + { + "epoch": 1.2469697941383955, + "grad_norm": 41.393699645996094, + "learning_rate": 5e-06, + "loss": 0.9754, + "num_input_tokens_seen": 613974360, + "step": 4861 + }, + { + "epoch": 1.2469697941383955, + "loss": 0.8590275645256042, + "loss_ce": 0.0003849620115943253, + "loss_iou": 0.400390625, + "loss_num": 0.01153564453125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 613974360, + "step": 4861 + }, + { + "epoch": 1.2472263195023408, + "grad_norm": 46.90896224975586, + "learning_rate": 5e-06, + "loss": 0.8434, + "num_input_tokens_seen": 614100904, + "step": 4862 + }, + { + "epoch": 1.2472263195023408, + "loss": 0.8090881109237671, + "loss_ce": 0.0007385116186924279, + "loss_iou": 0.376953125, + "loss_num": 0.01080322265625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 614100904, + "step": 4862 + }, + { + "epoch": 1.2474828448662862, + "grad_norm": 131.56016540527344, + "learning_rate": 5e-06, + "loss": 0.8431, + "num_input_tokens_seen": 614227544, + "step": 4863 + }, + { + "epoch": 1.2474828448662862, + "loss": 0.7408370971679688, + "loss_ce": 0.00011441647075116634, + "loss_iou": 0.34765625, + "loss_num": 0.00933837890625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 614227544, + "step": 4863 + }, + { + "epoch": 1.2477393702302315, + "grad_norm": 44.23210906982422, + "learning_rate": 5e-06, + "loss": 0.7914, + "num_input_tokens_seen": 614354336, + "step": 4864 + }, + { + "epoch": 1.2477393702302315, + "loss": 0.934173583984375, + "loss_ce": 9.159051114693284e-05, + "loss_iou": 0.4375, + "loss_num": 0.011474609375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 614354336, + "step": 4864 + }, + { + "epoch": 1.2479958955941768, + "grad_norm": 42.01106262207031, + "learning_rate": 5e-06, + "loss": 0.8108, + "num_input_tokens_seen": 614479916, + "step": 4865 + }, + { + "epoch": 1.2479958955941768, + "loss": 0.8208638429641724, + "loss_ce": 0.0010396565776318312, + "loss_iou": 0.38671875, + "loss_num": 0.00970458984375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 614479916, + "step": 4865 + }, + { + "epoch": 1.2482524209581223, + "grad_norm": 35.997737884521484, + "learning_rate": 5e-06, + "loss": 0.7723, + "num_input_tokens_seen": 614605824, + "step": 4866 + }, + { + "epoch": 1.2482524209581223, + "loss": 0.8122396469116211, + "loss_ce": 0.002181046409532428, + "loss_iou": 0.376953125, + "loss_num": 0.01104736328125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 614605824, + "step": 4866 + }, + { + "epoch": 1.2485089463220675, + "grad_norm": 31.633161544799805, + "learning_rate": 5e-06, + "loss": 0.7788, + "num_input_tokens_seen": 614731856, + "step": 4867 + }, + { + "epoch": 1.2485089463220675, + "loss": 0.6209088563919067, + "loss_ce": 0.0005474760546348989, + "loss_iou": 0.294921875, + "loss_num": 0.0059814453125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 614731856, + "step": 4867 + }, + { + "epoch": 1.248765471686013, + "grad_norm": 34.22772216796875, + "learning_rate": 5e-06, + "loss": 0.8492, + "num_input_tokens_seen": 614858756, + "step": 4868 + }, + { + "epoch": 1.248765471686013, + "loss": 0.6008121967315674, + "loss_ce": 0.0002262425987282768, + "loss_iou": 0.287109375, + "loss_num": 0.00506591796875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 614858756, + "step": 4868 + }, + { + "epoch": 1.2490219970499583, + "grad_norm": 58.88716506958008, + "learning_rate": 5e-06, + "loss": 0.9442, + "num_input_tokens_seen": 614985344, + "step": 4869 + }, + { + "epoch": 1.2490219970499583, + "loss": 1.031977653503418, + "loss_ce": 0.001215929165482521, + "loss_iou": 0.474609375, + "loss_num": 0.016357421875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 614985344, + "step": 4869 + }, + { + "epoch": 1.2492785224139036, + "grad_norm": 51.10793685913086, + "learning_rate": 5e-06, + "loss": 0.8577, + "num_input_tokens_seen": 615111432, + "step": 4870 + }, + { + "epoch": 1.2492785224139036, + "loss": 0.8859601616859436, + "loss_ce": 0.00461247842758894, + "loss_iou": 0.3984375, + "loss_num": 0.016845703125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 615111432, + "step": 4870 + }, + { + "epoch": 1.249535047777849, + "grad_norm": 49.4130973815918, + "learning_rate": 5e-06, + "loss": 0.8326, + "num_input_tokens_seen": 615237824, + "step": 4871 + }, + { + "epoch": 1.249535047777849, + "loss": 0.8178707957267761, + "loss_ce": 0.0014645553892478347, + "loss_iou": 0.37890625, + "loss_num": 0.01153564453125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 615237824, + "step": 4871 + }, + { + "epoch": 1.2497915731417943, + "grad_norm": 48.41147994995117, + "learning_rate": 5e-06, + "loss": 0.9895, + "num_input_tokens_seen": 615364084, + "step": 4872 + }, + { + "epoch": 1.2497915731417943, + "loss": 1.0228662490844727, + "loss_ce": 0.00040528789395466447, + "loss_iou": 0.462890625, + "loss_num": 0.0191650390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 615364084, + "step": 4872 + }, + { + "epoch": 1.2500480985057398, + "grad_norm": 40.475486755371094, + "learning_rate": 5e-06, + "loss": 0.8392, + "num_input_tokens_seen": 615489788, + "step": 4873 + }, + { + "epoch": 1.2500480985057398, + "loss": 0.9155563116073608, + "loss_ce": 0.0014938146341592073, + "loss_iou": 0.416015625, + "loss_num": 0.0162353515625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 615489788, + "step": 4873 + }, + { + "epoch": 1.250304623869685, + "grad_norm": 58.11557388305664, + "learning_rate": 5e-06, + "loss": 0.8285, + "num_input_tokens_seen": 615616924, + "step": 4874 + }, + { + "epoch": 1.250304623869685, + "loss": 0.8934458494186401, + "loss_ce": 0.00037946016527712345, + "loss_iou": 0.419921875, + "loss_num": 0.0106201171875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 615616924, + "step": 4874 + }, + { + "epoch": 1.2505611492336306, + "grad_norm": 45.64991760253906, + "learning_rate": 5e-06, + "loss": 0.9463, + "num_input_tokens_seen": 615742712, + "step": 4875 + }, + { + "epoch": 1.2505611492336306, + "loss": 1.0043511390686035, + "loss_ce": 0.004839432891458273, + "loss_iou": 0.458984375, + "loss_num": 0.0167236328125, + "loss_xval": 1.0, + "num_input_tokens_seen": 615742712, + "step": 4875 + }, + { + "epoch": 1.2508176745975759, + "grad_norm": 27.524810791015625, + "learning_rate": 5e-06, + "loss": 0.7656, + "num_input_tokens_seen": 615868644, + "step": 4876 + }, + { + "epoch": 1.2508176745975759, + "loss": 0.5897889137268066, + "loss_ce": 0.0004334656405262649, + "loss_iou": 0.28125, + "loss_num": 0.005279541015625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 615868644, + "step": 4876 + }, + { + "epoch": 1.2510741999615211, + "grad_norm": 29.8249568939209, + "learning_rate": 5e-06, + "loss": 0.7741, + "num_input_tokens_seen": 615995076, + "step": 4877 + }, + { + "epoch": 1.2510741999615211, + "loss": 0.8309594988822937, + "loss_ce": 0.00039308282430283725, + "loss_iou": 0.38671875, + "loss_num": 0.0115966796875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 615995076, + "step": 4877 + }, + { + "epoch": 1.2513307253254666, + "grad_norm": 51.327857971191406, + "learning_rate": 5e-06, + "loss": 0.8185, + "num_input_tokens_seen": 616121512, + "step": 4878 + }, + { + "epoch": 1.2513307253254666, + "loss": 0.7672574520111084, + "loss_ce": 0.0035855905152857304, + "loss_iou": 0.369140625, + "loss_num": 0.005157470703125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 616121512, + "step": 4878 + }, + { + "epoch": 1.251587250689412, + "grad_norm": 49.58641052246094, + "learning_rate": 5e-06, + "loss": 0.9151, + "num_input_tokens_seen": 616248280, + "step": 4879 + }, + { + "epoch": 1.251587250689412, + "loss": 0.8126684427261353, + "loss_ce": 0.002609824063256383, + "loss_iou": 0.380859375, + "loss_num": 0.0093994140625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 616248280, + "step": 4879 + }, + { + "epoch": 1.2518437760533572, + "grad_norm": 50.77007293701172, + "learning_rate": 5e-06, + "loss": 0.918, + "num_input_tokens_seen": 616373940, + "step": 4880 + }, + { + "epoch": 1.2518437760533572, + "loss": 0.8627360463142395, + "loss_ce": 0.0014079133979976177, + "loss_iou": 0.39453125, + "loss_num": 0.014404296875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 616373940, + "step": 4880 + }, + { + "epoch": 1.2521003014173027, + "grad_norm": 44.6944694519043, + "learning_rate": 5e-06, + "loss": 0.8419, + "num_input_tokens_seen": 616500028, + "step": 4881 + }, + { + "epoch": 1.2521003014173027, + "loss": 0.97746342420578, + "loss_ce": 0.001389187527820468, + "loss_iou": 0.453125, + "loss_num": 0.0142822265625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 616500028, + "step": 4881 + }, + { + "epoch": 1.252356826781248, + "grad_norm": 41.16373062133789, + "learning_rate": 5e-06, + "loss": 0.8961, + "num_input_tokens_seen": 616626664, + "step": 4882 + }, + { + "epoch": 1.252356826781248, + "loss": 0.9138579964637756, + "loss_ce": 0.0002837352512869984, + "loss_iou": 0.42578125, + "loss_num": 0.012451171875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 616626664, + "step": 4882 + }, + { + "epoch": 1.2526133521451934, + "grad_norm": 34.452972412109375, + "learning_rate": 5e-06, + "loss": 0.8164, + "num_input_tokens_seen": 616752212, + "step": 4883 + }, + { + "epoch": 1.2526133521451934, + "loss": 0.7162067890167236, + "loss_ce": 0.00038651403156109154, + "loss_iou": 0.341796875, + "loss_num": 0.0064697265625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 616752212, + "step": 4883 + }, + { + "epoch": 1.2528698775091387, + "grad_norm": 32.316307067871094, + "learning_rate": 5e-06, + "loss": 0.8049, + "num_input_tokens_seen": 616879836, + "step": 4884 + }, + { + "epoch": 1.2528698775091387, + "loss": 0.6363385915756226, + "loss_ce": 0.0001081209848052822, + "loss_iou": 0.30078125, + "loss_num": 0.006683349609375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 616879836, + "step": 4884 + }, + { + "epoch": 1.2531264028730842, + "grad_norm": 30.54315757751465, + "learning_rate": 5e-06, + "loss": 0.7412, + "num_input_tokens_seen": 617004768, + "step": 4885 + }, + { + "epoch": 1.2531264028730842, + "loss": 0.7676336169242859, + "loss_ce": 5.5488242651335895e-05, + "loss_iou": 0.365234375, + "loss_num": 0.007232666015625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 617004768, + "step": 4885 + }, + { + "epoch": 1.2533829282370295, + "grad_norm": 24.224180221557617, + "learning_rate": 5e-06, + "loss": 0.8331, + "num_input_tokens_seen": 617130584, + "step": 4886 + }, + { + "epoch": 1.2533829282370295, + "loss": 0.9251647591590881, + "loss_ce": 0.0008483612909913063, + "loss_iou": 0.412109375, + "loss_num": 0.0196533203125, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 617130584, + "step": 4886 + }, + { + "epoch": 1.2536394536009747, + "grad_norm": 34.50944519042969, + "learning_rate": 5e-06, + "loss": 0.856, + "num_input_tokens_seen": 617257368, + "step": 4887 + }, + { + "epoch": 1.2536394536009747, + "loss": 0.6749765872955322, + "loss_ce": 0.001636710250750184, + "loss_iou": 0.318359375, + "loss_num": 0.00701904296875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 617257368, + "step": 4887 + }, + { + "epoch": 1.2538959789649202, + "grad_norm": 33.48082733154297, + "learning_rate": 5e-06, + "loss": 0.759, + "num_input_tokens_seen": 617383100, + "step": 4888 + }, + { + "epoch": 1.2538959789649202, + "loss": 0.8120369911193848, + "loss_ce": 0.0010018073953688145, + "loss_iou": 0.37890625, + "loss_num": 0.0103759765625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 617383100, + "step": 4888 + }, + { + "epoch": 1.2541525043288655, + "grad_norm": 26.95652198791504, + "learning_rate": 5e-06, + "loss": 0.8689, + "num_input_tokens_seen": 617509416, + "step": 4889 + }, + { + "epoch": 1.2541525043288655, + "loss": 0.9684051275253296, + "loss_ce": 0.0011809475254267454, + "loss_iou": 0.4296875, + "loss_num": 0.0216064453125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 617509416, + "step": 4889 + }, + { + "epoch": 1.2544090296928108, + "grad_norm": 38.312721252441406, + "learning_rate": 5e-06, + "loss": 0.8086, + "num_input_tokens_seen": 617634628, + "step": 4890 + }, + { + "epoch": 1.2544090296928108, + "loss": 0.9245145320892334, + "loss_ce": 0.00019810721278190613, + "loss_iou": 0.41796875, + "loss_num": 0.0179443359375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 617634628, + "step": 4890 + }, + { + "epoch": 1.2546655550567563, + "grad_norm": 22.47524070739746, + "learning_rate": 5e-06, + "loss": 0.8985, + "num_input_tokens_seen": 617760612, + "step": 4891 + }, + { + "epoch": 1.2546655550567563, + "loss": 0.8494176864624023, + "loss_ce": 0.0012731136521324515, + "loss_iou": 0.3984375, + "loss_num": 0.010498046875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 617760612, + "step": 4891 + }, + { + "epoch": 1.2549220804207015, + "grad_norm": 35.46991729736328, + "learning_rate": 5e-06, + "loss": 0.8283, + "num_input_tokens_seen": 617887532, + "step": 4892 + }, + { + "epoch": 1.2549220804207015, + "loss": 0.7959955930709839, + "loss_ce": 9.717111242935061e-05, + "loss_iou": 0.373046875, + "loss_num": 0.01025390625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 617887532, + "step": 4892 + }, + { + "epoch": 1.255178605784647, + "grad_norm": 34.90095901489258, + "learning_rate": 5e-06, + "loss": 0.9707, + "num_input_tokens_seen": 618012804, + "step": 4893 + }, + { + "epoch": 1.255178605784647, + "loss": 0.906758189201355, + "loss_ce": 0.004658609628677368, + "loss_iou": 0.40625, + "loss_num": 0.0181884765625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 618012804, + "step": 4893 + }, + { + "epoch": 1.2554351311485923, + "grad_norm": 36.682708740234375, + "learning_rate": 5e-06, + "loss": 0.8237, + "num_input_tokens_seen": 618138664, + "step": 4894 + }, + { + "epoch": 1.2554351311485923, + "loss": 0.8308547735214233, + "loss_ce": 0.0012649295385926962, + "loss_iou": 0.365234375, + "loss_num": 0.01953125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 618138664, + "step": 4894 + }, + { + "epoch": 1.2556916565125378, + "grad_norm": 35.46415328979492, + "learning_rate": 5e-06, + "loss": 0.9305, + "num_input_tokens_seen": 618265152, + "step": 4895 + }, + { + "epoch": 1.2556916565125378, + "loss": 0.9202329516410828, + "loss_ce": 0.00128766184207052, + "loss_iou": 0.44140625, + "loss_num": 0.00714111328125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 618265152, + "step": 4895 + }, + { + "epoch": 1.255948181876483, + "grad_norm": 53.456947326660156, + "learning_rate": 5e-06, + "loss": 0.8827, + "num_input_tokens_seen": 618392128, + "step": 4896 + }, + { + "epoch": 1.255948181876483, + "loss": 0.7889403104782104, + "loss_ce": 0.00012191900168545544, + "loss_iou": 0.361328125, + "loss_num": 0.01300048828125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 618392128, + "step": 4896 + }, + { + "epoch": 1.2562047072404283, + "grad_norm": 39.771244049072266, + "learning_rate": 5e-06, + "loss": 0.9099, + "num_input_tokens_seen": 618518224, + "step": 4897 + }, + { + "epoch": 1.2562047072404283, + "loss": 0.7981019616127014, + "loss_ce": 0.0022034896537661552, + "loss_iou": 0.37109375, + "loss_num": 0.01043701171875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 618518224, + "step": 4897 + }, + { + "epoch": 1.2564612326043738, + "grad_norm": 30.311843872070312, + "learning_rate": 5e-06, + "loss": 0.8187, + "num_input_tokens_seen": 618645148, + "step": 4898 + }, + { + "epoch": 1.2564612326043738, + "loss": 0.7063883543014526, + "loss_ce": 0.0005778180784545839, + "loss_iou": 0.330078125, + "loss_num": 0.0091552734375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 618645148, + "step": 4898 + }, + { + "epoch": 1.256717757968319, + "grad_norm": 26.57332420349121, + "learning_rate": 5e-06, + "loss": 0.8633, + "num_input_tokens_seen": 618771708, + "step": 4899 + }, + { + "epoch": 1.256717757968319, + "loss": 0.9121111631393433, + "loss_ce": 0.0002459475945215672, + "loss_iou": 0.41796875, + "loss_num": 0.01513671875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 618771708, + "step": 4899 + }, + { + "epoch": 1.2569742833322644, + "grad_norm": 23.006776809692383, + "learning_rate": 5e-06, + "loss": 0.9176, + "num_input_tokens_seen": 618897688, + "step": 4900 + }, + { + "epoch": 1.2569742833322644, + "loss": 0.8881502151489258, + "loss_ce": 0.00045489604235626757, + "loss_iou": 0.3984375, + "loss_num": 0.018310546875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 618897688, + "step": 4900 + }, + { + "epoch": 1.2572308086962098, + "grad_norm": 28.260047912597656, + "learning_rate": 5e-06, + "loss": 0.9202, + "num_input_tokens_seen": 619022944, + "step": 4901 + }, + { + "epoch": 1.2572308086962098, + "loss": 1.344700574874878, + "loss_ce": 0.0053450437262654305, + "loss_iou": 0.5625, + "loss_num": 0.042724609375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 619022944, + "step": 4901 + }, + { + "epoch": 1.2574873340601551, + "grad_norm": 37.50130844116211, + "learning_rate": 5e-06, + "loss": 0.7861, + "num_input_tokens_seen": 619149496, + "step": 4902 + }, + { + "epoch": 1.2574873340601551, + "loss": 0.8403104543685913, + "loss_ce": 0.00046673332690261304, + "loss_iou": 0.39453125, + "loss_num": 0.01025390625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 619149496, + "step": 4902 + }, + { + "epoch": 1.2577438594241006, + "grad_norm": 52.76167678833008, + "learning_rate": 5e-06, + "loss": 0.735, + "num_input_tokens_seen": 619274856, + "step": 4903 + }, + { + "epoch": 1.2577438594241006, + "loss": 0.7348695993423462, + "loss_ce": 0.0012270710431039333, + "loss_iou": 0.3359375, + "loss_num": 0.01202392578125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 619274856, + "step": 4903 + }, + { + "epoch": 1.2580003847880459, + "grad_norm": 51.674373626708984, + "learning_rate": 5e-06, + "loss": 0.7746, + "num_input_tokens_seen": 619400636, + "step": 4904 + }, + { + "epoch": 1.2580003847880459, + "loss": 0.7751842737197876, + "loss_ce": 0.0036998712457716465, + "loss_iou": 0.36328125, + "loss_num": 0.00933837890625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 619400636, + "step": 4904 + }, + { + "epoch": 1.2582569101519914, + "grad_norm": 50.15906524658203, + "learning_rate": 5e-06, + "loss": 0.7494, + "num_input_tokens_seen": 619526148, + "step": 4905 + }, + { + "epoch": 1.2582569101519914, + "loss": 0.7882883548736572, + "loss_ce": 0.0006906596245244145, + "loss_iou": 0.376953125, + "loss_num": 0.00665283203125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 619526148, + "step": 4905 + }, + { + "epoch": 1.2585134355159366, + "grad_norm": 34.59101104736328, + "learning_rate": 5e-06, + "loss": 0.9273, + "num_input_tokens_seen": 619653012, + "step": 4906 + }, + { + "epoch": 1.2585134355159366, + "loss": 0.9462058544158936, + "loss_ce": 0.002846541814506054, + "loss_iou": 0.42578125, + "loss_num": 0.017822265625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 619653012, + "step": 4906 + }, + { + "epoch": 1.258769960879882, + "grad_norm": 37.81549072265625, + "learning_rate": 5e-06, + "loss": 0.7821, + "num_input_tokens_seen": 619779956, + "step": 4907 + }, + { + "epoch": 1.258769960879882, + "loss": 0.7698701620101929, + "loss_ce": 9.47793887462467e-05, + "loss_iou": 0.357421875, + "loss_num": 0.01129150390625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 619779956, + "step": 4907 + }, + { + "epoch": 1.2590264862438274, + "grad_norm": 28.712631225585938, + "learning_rate": 5e-06, + "loss": 0.8278, + "num_input_tokens_seen": 619905992, + "step": 4908 + }, + { + "epoch": 1.2590264862438274, + "loss": 0.7908428907394409, + "loss_ce": 0.002024509944021702, + "loss_iou": 0.359375, + "loss_num": 0.0135498046875, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 619905992, + "step": 4908 + }, + { + "epoch": 1.2592830116077727, + "grad_norm": 17.206342697143555, + "learning_rate": 5e-06, + "loss": 0.7712, + "num_input_tokens_seen": 620031472, + "step": 4909 + }, + { + "epoch": 1.2592830116077727, + "loss": 0.6872033476829529, + "loss_ce": 0.0028771499637514353, + "loss_iou": 0.31640625, + "loss_num": 0.01031494140625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 620031472, + "step": 4909 + }, + { + "epoch": 1.259539536971718, + "grad_norm": 16.366432189941406, + "learning_rate": 5e-06, + "loss": 0.8707, + "num_input_tokens_seen": 620157352, + "step": 4910 + }, + { + "epoch": 1.259539536971718, + "loss": 1.0560791492462158, + "loss_ce": 0.0023681398015469313, + "loss_iou": 0.486328125, + "loss_num": 0.0167236328125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 620157352, + "step": 4910 + }, + { + "epoch": 1.2597960623356634, + "grad_norm": 29.159927368164062, + "learning_rate": 5e-06, + "loss": 0.8539, + "num_input_tokens_seen": 620284752, + "step": 4911 + }, + { + "epoch": 1.2597960623356634, + "loss": 0.7015319466590881, + "loss_ce": 0.005242906045168638, + "loss_iou": 0.3359375, + "loss_num": 0.004669189453125, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 620284752, + "step": 4911 + }, + { + "epoch": 1.260052587699609, + "grad_norm": 39.89482116699219, + "learning_rate": 5e-06, + "loss": 0.8178, + "num_input_tokens_seen": 620412012, + "step": 4912 + }, + { + "epoch": 1.260052587699609, + "loss": 0.7695851922035217, + "loss_ce": 0.00029806458042003214, + "loss_iou": 0.35546875, + "loss_num": 0.011962890625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 620412012, + "step": 4912 + }, + { + "epoch": 1.2603091130635542, + "grad_norm": 19.92340850830078, + "learning_rate": 5e-06, + "loss": 0.7858, + "num_input_tokens_seen": 620538628, + "step": 4913 + }, + { + "epoch": 1.2603091130635542, + "loss": 0.6973683834075928, + "loss_ce": 0.00034686760045588017, + "loss_iou": 0.31640625, + "loss_num": 0.01287841796875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 620538628, + "step": 4913 + }, + { + "epoch": 1.2605656384274995, + "grad_norm": 15.72502326965332, + "learning_rate": 5e-06, + "loss": 0.8266, + "num_input_tokens_seen": 620665896, + "step": 4914 + }, + { + "epoch": 1.2605656384274995, + "loss": 0.9105359315872192, + "loss_ce": 0.005262497812509537, + "loss_iou": 0.412109375, + "loss_num": 0.0162353515625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 620665896, + "step": 4914 + }, + { + "epoch": 1.260822163791445, + "grad_norm": 22.429065704345703, + "learning_rate": 5e-06, + "loss": 0.859, + "num_input_tokens_seen": 620792040, + "step": 4915 + }, + { + "epoch": 1.260822163791445, + "loss": 0.8305152654647827, + "loss_ce": 0.0001930258295033127, + "loss_iou": 0.37890625, + "loss_num": 0.014404296875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 620792040, + "step": 4915 + }, + { + "epoch": 1.2610786891553902, + "grad_norm": 25.278873443603516, + "learning_rate": 5e-06, + "loss": 0.7277, + "num_input_tokens_seen": 620918604, + "step": 4916 + }, + { + "epoch": 1.2610786891553902, + "loss": 0.8336285948753357, + "loss_ce": 0.00013253385259304196, + "loss_iou": 0.392578125, + "loss_num": 0.010009765625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 620918604, + "step": 4916 + }, + { + "epoch": 1.2613352145193355, + "grad_norm": 34.928253173828125, + "learning_rate": 5e-06, + "loss": 0.913, + "num_input_tokens_seen": 621044528, + "step": 4917 + }, + { + "epoch": 1.2613352145193355, + "loss": 1.0888681411743164, + "loss_ce": 0.0014657413121312857, + "loss_iou": 0.5234375, + "loss_num": 0.0086669921875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 621044528, + "step": 4917 + }, + { + "epoch": 1.261591739883281, + "grad_norm": 31.51630210876465, + "learning_rate": 5e-06, + "loss": 0.8183, + "num_input_tokens_seen": 621170520, + "step": 4918 + }, + { + "epoch": 1.261591739883281, + "loss": 0.6987836956977844, + "loss_ce": 0.002494653221219778, + "loss_iou": 0.330078125, + "loss_num": 0.007171630859375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 621170520, + "step": 4918 + }, + { + "epoch": 1.2618482652472263, + "grad_norm": 157.75596618652344, + "learning_rate": 5e-06, + "loss": 0.7645, + "num_input_tokens_seen": 621296792, + "step": 4919 + }, + { + "epoch": 1.2618482652472263, + "loss": 0.670246422290802, + "loss_ce": 0.000568658928386867, + "loss_iou": 0.322265625, + "loss_num": 0.005126953125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 621296792, + "step": 4919 + }, + { + "epoch": 1.2621047906111718, + "grad_norm": 55.24531936645508, + "learning_rate": 5e-06, + "loss": 0.798, + "num_input_tokens_seen": 621422940, + "step": 4920 + }, + { + "epoch": 1.2621047906111718, + "loss": 0.7767440676689148, + "loss_ce": 0.001353443250991404, + "loss_iou": 0.36328125, + "loss_num": 0.010009765625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 621422940, + "step": 4920 + }, + { + "epoch": 1.262361315975117, + "grad_norm": 48.09327697753906, + "learning_rate": 5e-06, + "loss": 0.8664, + "num_input_tokens_seen": 621550052, + "step": 4921 + }, + { + "epoch": 1.262361315975117, + "loss": 0.7667883634567261, + "loss_ce": 0.00018680444918572903, + "loss_iou": 0.369140625, + "loss_num": 0.005828857421875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 621550052, + "step": 4921 + }, + { + "epoch": 1.2626178413390625, + "grad_norm": 35.5316047668457, + "learning_rate": 5e-06, + "loss": 0.8503, + "num_input_tokens_seen": 621676436, + "step": 4922 + }, + { + "epoch": 1.2626178413390625, + "loss": 0.7367488145828247, + "loss_ce": 0.0001765655179042369, + "loss_iou": 0.345703125, + "loss_num": 0.0087890625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 621676436, + "step": 4922 + }, + { + "epoch": 1.2628743667030078, + "grad_norm": 40.7636833190918, + "learning_rate": 5e-06, + "loss": 1.045, + "num_input_tokens_seen": 621803176, + "step": 4923 + }, + { + "epoch": 1.2628743667030078, + "loss": 1.0088164806365967, + "loss_ce": 0.0010039464104920626, + "loss_iou": 0.455078125, + "loss_num": 0.0198974609375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 621803176, + "step": 4923 + }, + { + "epoch": 1.263130892066953, + "grad_norm": 50.91525650024414, + "learning_rate": 5e-06, + "loss": 0.9375, + "num_input_tokens_seen": 621929336, + "step": 4924 + }, + { + "epoch": 1.263130892066953, + "loss": 0.804293692111969, + "loss_ce": 0.0008268889505416155, + "loss_iou": 0.36328125, + "loss_num": 0.01556396484375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 621929336, + "step": 4924 + }, + { + "epoch": 1.2633874174308986, + "grad_norm": 56.523826599121094, + "learning_rate": 5e-06, + "loss": 0.8022, + "num_input_tokens_seen": 622055692, + "step": 4925 + }, + { + "epoch": 1.2633874174308986, + "loss": 0.8145253658294678, + "loss_ce": 0.0005605214973911643, + "loss_iou": 0.380859375, + "loss_num": 0.0103759765625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 622055692, + "step": 4925 + }, + { + "epoch": 1.2636439427948438, + "grad_norm": 54.99233627319336, + "learning_rate": 5e-06, + "loss": 0.7361, + "num_input_tokens_seen": 622182960, + "step": 4926 + }, + { + "epoch": 1.2636439427948438, + "loss": 0.8205083608627319, + "loss_ce": 0.00019586940470617265, + "loss_iou": 0.3828125, + "loss_num": 0.01123046875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 622182960, + "step": 4926 + }, + { + "epoch": 1.263900468158789, + "grad_norm": 49.662010192871094, + "learning_rate": 5e-06, + "loss": 0.9016, + "num_input_tokens_seen": 622309104, + "step": 4927 + }, + { + "epoch": 1.263900468158789, + "loss": 0.6709178686141968, + "loss_ce": 0.00026355497539043427, + "loss_iou": 0.3203125, + "loss_num": 0.00567626953125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 622309104, + "step": 4927 + }, + { + "epoch": 1.2641569935227346, + "grad_norm": 24.025911331176758, + "learning_rate": 5e-06, + "loss": 0.8672, + "num_input_tokens_seen": 622434688, + "step": 4928 + }, + { + "epoch": 1.2641569935227346, + "loss": 0.9914757609367371, + "loss_ce": 0.0012413345975801349, + "loss_iou": 0.4609375, + "loss_num": 0.013427734375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 622434688, + "step": 4928 + }, + { + "epoch": 1.2644135188866799, + "grad_norm": 35.365360260009766, + "learning_rate": 5e-06, + "loss": 0.963, + "num_input_tokens_seen": 622561684, + "step": 4929 + }, + { + "epoch": 1.2644135188866799, + "loss": 0.9157590866088867, + "loss_ce": 0.00023174025409389287, + "loss_iou": 0.431640625, + "loss_num": 0.01043701171875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 622561684, + "step": 4929 + }, + { + "epoch": 1.2646700442506253, + "grad_norm": 37.33714294433594, + "learning_rate": 5e-06, + "loss": 0.7778, + "num_input_tokens_seen": 622688044, + "step": 4930 + }, + { + "epoch": 1.2646700442506253, + "loss": 0.8068856000900269, + "loss_ce": 0.0012215173337608576, + "loss_iou": 0.376953125, + "loss_num": 0.01019287109375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 622688044, + "step": 4930 + }, + { + "epoch": 1.2649265696145706, + "grad_norm": 35.873291015625, + "learning_rate": 5e-06, + "loss": 0.8641, + "num_input_tokens_seen": 622813216, + "step": 4931 + }, + { + "epoch": 1.2649265696145706, + "loss": 0.9125863313674927, + "loss_ce": 0.0014535472728312016, + "loss_iou": 0.423828125, + "loss_num": 0.0128173828125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 622813216, + "step": 4931 + }, + { + "epoch": 1.265183094978516, + "grad_norm": 41.349002838134766, + "learning_rate": 5e-06, + "loss": 0.8894, + "num_input_tokens_seen": 622939912, + "step": 4932 + }, + { + "epoch": 1.265183094978516, + "loss": 0.9215930700302124, + "loss_ce": 0.00020635877444874495, + "loss_iou": 0.423828125, + "loss_num": 0.014404296875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 622939912, + "step": 4932 + }, + { + "epoch": 1.2654396203424614, + "grad_norm": 40.74809646606445, + "learning_rate": 5e-06, + "loss": 0.8246, + "num_input_tokens_seen": 623065612, + "step": 4933 + }, + { + "epoch": 1.2654396203424614, + "loss": 0.76250821352005, + "loss_ce": 0.0007894561276771128, + "loss_iou": 0.34375, + "loss_num": 0.01495361328125, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 623065612, + "step": 4933 + }, + { + "epoch": 1.2656961457064067, + "grad_norm": 40.19126892089844, + "learning_rate": 5e-06, + "loss": 0.8494, + "num_input_tokens_seen": 623191528, + "step": 4934 + }, + { + "epoch": 1.2656961457064067, + "loss": 0.9265611171722412, + "loss_ce": 0.0007798410952091217, + "loss_iou": 0.431640625, + "loss_num": 0.0126953125, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 623191528, + "step": 4934 + }, + { + "epoch": 1.2659526710703521, + "grad_norm": 53.57729721069336, + "learning_rate": 5e-06, + "loss": 0.9314, + "num_input_tokens_seen": 623319560, + "step": 4935 + }, + { + "epoch": 1.2659526710703521, + "loss": 0.8819797039031982, + "loss_ce": 0.0006321133696474135, + "loss_iou": 0.408203125, + "loss_num": 0.0128173828125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 623319560, + "step": 4935 + }, + { + "epoch": 1.2662091964342974, + "grad_norm": 58.01642608642578, + "learning_rate": 5e-06, + "loss": 0.9029, + "num_input_tokens_seen": 623445816, + "step": 4936 + }, + { + "epoch": 1.2662091964342974, + "loss": 0.806560754776001, + "loss_ce": 0.0013849661918357015, + "loss_iou": 0.37890625, + "loss_num": 0.0093994140625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 623445816, + "step": 4936 + }, + { + "epoch": 1.2664657217982427, + "grad_norm": 46.59574508666992, + "learning_rate": 5e-06, + "loss": 0.871, + "num_input_tokens_seen": 623571596, + "step": 4937 + }, + { + "epoch": 1.2664657217982427, + "loss": 0.8737508058547974, + "loss_ce": 0.001192231778986752, + "loss_iou": 0.41015625, + "loss_num": 0.01080322265625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 623571596, + "step": 4937 + }, + { + "epoch": 1.2667222471621882, + "grad_norm": 44.851593017578125, + "learning_rate": 5e-06, + "loss": 0.8283, + "num_input_tokens_seen": 623698072, + "step": 4938 + }, + { + "epoch": 1.2667222471621882, + "loss": 0.7850120067596436, + "loss_ce": 0.0008323309011757374, + "loss_iou": 0.359375, + "loss_num": 0.01251220703125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 623698072, + "step": 4938 + }, + { + "epoch": 1.2669787725261334, + "grad_norm": 46.51664733886719, + "learning_rate": 5e-06, + "loss": 0.9242, + "num_input_tokens_seen": 623824560, + "step": 4939 + }, + { + "epoch": 1.2669787725261334, + "loss": 1.0501995086669922, + "loss_ce": 0.0008830720908008516, + "loss_iou": 0.47265625, + "loss_num": 0.0206298828125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 623824560, + "step": 4939 + }, + { + "epoch": 1.267235297890079, + "grad_norm": 50.585899353027344, + "learning_rate": 5e-06, + "loss": 0.9647, + "num_input_tokens_seen": 623951788, + "step": 4940 + }, + { + "epoch": 1.267235297890079, + "loss": 1.1142146587371826, + "loss_ce": 0.005083835683763027, + "loss_iou": 0.486328125, + "loss_num": 0.02734375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 623951788, + "step": 4940 + }, + { + "epoch": 1.2674918232540242, + "grad_norm": 79.7021484375, + "learning_rate": 5e-06, + "loss": 0.8036, + "num_input_tokens_seen": 624078420, + "step": 4941 + }, + { + "epoch": 1.2674918232540242, + "loss": 0.7939039468765259, + "loss_ce": 0.00044689897913485765, + "loss_iou": 0.373046875, + "loss_num": 0.00958251953125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 624078420, + "step": 4941 + }, + { + "epoch": 1.2677483486179697, + "grad_norm": 38.69801330566406, + "learning_rate": 5e-06, + "loss": 0.7837, + "num_input_tokens_seen": 624204424, + "step": 4942 + }, + { + "epoch": 1.2677483486179697, + "loss": 0.7445943355560303, + "loss_ce": 0.0002095317468047142, + "loss_iou": 0.341796875, + "loss_num": 0.011962890625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 624204424, + "step": 4942 + }, + { + "epoch": 1.268004873981915, + "grad_norm": 31.011985778808594, + "learning_rate": 5e-06, + "loss": 0.8156, + "num_input_tokens_seen": 624330612, + "step": 4943 + }, + { + "epoch": 1.268004873981915, + "loss": 0.9222507476806641, + "loss_ce": 0.0013523295056074858, + "loss_iou": 0.435546875, + "loss_num": 0.01025390625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 624330612, + "step": 4943 + }, + { + "epoch": 1.2682613993458602, + "grad_norm": 37.95759963989258, + "learning_rate": 5e-06, + "loss": 0.69, + "num_input_tokens_seen": 624455732, + "step": 4944 + }, + { + "epoch": 1.2682613993458602, + "loss": 0.6654244661331177, + "loss_ce": 0.0011177989654242992, + "loss_iou": 0.314453125, + "loss_num": 0.0068359375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 624455732, + "step": 4944 + }, + { + "epoch": 1.2685179247098057, + "grad_norm": 41.530845642089844, + "learning_rate": 5e-06, + "loss": 0.8424, + "num_input_tokens_seen": 624581880, + "step": 4945 + }, + { + "epoch": 1.2685179247098057, + "loss": 0.7376614809036255, + "loss_ce": 0.0006009698263369501, + "loss_iou": 0.34765625, + "loss_num": 0.0081787109375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 624581880, + "step": 4945 + }, + { + "epoch": 1.268774450073751, + "grad_norm": 29.9755916595459, + "learning_rate": 5e-06, + "loss": 0.8629, + "num_input_tokens_seen": 624707216, + "step": 4946 + }, + { + "epoch": 1.268774450073751, + "loss": 0.88881516456604, + "loss_ce": 0.005026082042604685, + "loss_iou": 0.408203125, + "loss_num": 0.0133056640625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 624707216, + "step": 4946 + }, + { + "epoch": 1.2690309754376963, + "grad_norm": 24.138750076293945, + "learning_rate": 5e-06, + "loss": 0.7921, + "num_input_tokens_seen": 624833164, + "step": 4947 + }, + { + "epoch": 1.2690309754376963, + "loss": 0.7643711566925049, + "loss_ce": 0.0004551385063678026, + "loss_iou": 0.361328125, + "loss_num": 0.00787353515625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 624833164, + "step": 4947 + }, + { + "epoch": 1.2692875008016418, + "grad_norm": 35.21095657348633, + "learning_rate": 5e-06, + "loss": 0.8659, + "num_input_tokens_seen": 624959928, + "step": 4948 + }, + { + "epoch": 1.2692875008016418, + "loss": 0.8944958448410034, + "loss_ce": 0.00045292655704542994, + "loss_iou": 0.427734375, + "loss_num": 0.00750732421875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 624959928, + "step": 4948 + }, + { + "epoch": 1.2695440261655873, + "grad_norm": 26.840309143066406, + "learning_rate": 5e-06, + "loss": 0.8041, + "num_input_tokens_seen": 625086680, + "step": 4949 + }, + { + "epoch": 1.2695440261655873, + "loss": 0.8396748304367065, + "loss_ce": 0.00031930243130773306, + "loss_iou": 0.39453125, + "loss_num": 0.01025390625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 625086680, + "step": 4949 + }, + { + "epoch": 1.2698005515295325, + "grad_norm": 18.462574005126953, + "learning_rate": 5e-06, + "loss": 0.8576, + "num_input_tokens_seen": 625212184, + "step": 4950 + }, + { + "epoch": 1.2698005515295325, + "loss": 0.9299997687339783, + "loss_ce": 0.0003121958579868078, + "loss_iou": 0.423828125, + "loss_num": 0.0167236328125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 625212184, + "step": 4950 + }, + { + "epoch": 1.2700570768934778, + "grad_norm": 20.300743103027344, + "learning_rate": 5e-06, + "loss": 0.9722, + "num_input_tokens_seen": 625337600, + "step": 4951 + }, + { + "epoch": 1.2700570768934778, + "loss": 0.8279280662536621, + "loss_ce": 0.0012678703060373664, + "loss_iou": 0.390625, + "loss_num": 0.008544921875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 625337600, + "step": 4951 + }, + { + "epoch": 1.2703136022574233, + "grad_norm": 25.287561416625977, + "learning_rate": 5e-06, + "loss": 0.8281, + "num_input_tokens_seen": 625463356, + "step": 4952 + }, + { + "epoch": 1.2703136022574233, + "loss": 0.7431397438049316, + "loss_ce": 0.0007080832729116082, + "loss_iou": 0.3515625, + "loss_num": 0.007720947265625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 625463356, + "step": 4952 + }, + { + "epoch": 1.2705701276213686, + "grad_norm": 41.39350891113281, + "learning_rate": 5e-06, + "loss": 0.7834, + "num_input_tokens_seen": 625589856, + "step": 4953 + }, + { + "epoch": 1.2705701276213686, + "loss": 0.7960860133171082, + "loss_ce": 0.003605557605624199, + "loss_iou": 0.3671875, + "loss_num": 0.01177978515625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 625589856, + "step": 4953 + }, + { + "epoch": 1.2708266529853138, + "grad_norm": 47.46560287475586, + "learning_rate": 5e-06, + "loss": 0.7745, + "num_input_tokens_seen": 625716228, + "step": 4954 + }, + { + "epoch": 1.2708266529853138, + "loss": 0.8450028300285339, + "loss_ce": 0.00222941511310637, + "loss_iou": 0.37890625, + "loss_num": 0.01708984375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 625716228, + "step": 4954 + }, + { + "epoch": 1.2710831783492593, + "grad_norm": 53.21396255493164, + "learning_rate": 5e-06, + "loss": 0.8679, + "num_input_tokens_seen": 625843256, + "step": 4955 + }, + { + "epoch": 1.2710831783492593, + "loss": 0.8839325308799744, + "loss_ce": 0.0006317896768450737, + "loss_iou": 0.412109375, + "loss_num": 0.01202392578125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 625843256, + "step": 4955 + }, + { + "epoch": 1.2713397037132046, + "grad_norm": 51.1306266784668, + "learning_rate": 5e-06, + "loss": 0.8679, + "num_input_tokens_seen": 625970164, + "step": 4956 + }, + { + "epoch": 1.2713397037132046, + "loss": 0.7650450468063354, + "loss_ce": 0.0013731769286096096, + "loss_iou": 0.3671875, + "loss_num": 0.005645751953125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 625970164, + "step": 4956 + }, + { + "epoch": 1.27159622907715, + "grad_norm": 28.96293067932129, + "learning_rate": 5e-06, + "loss": 0.8909, + "num_input_tokens_seen": 626095448, + "step": 4957 + }, + { + "epoch": 1.27159622907715, + "loss": 1.003610610961914, + "loss_ce": 0.0006808904581703246, + "loss_iou": 0.4375, + "loss_num": 0.02587890625, + "loss_xval": 1.0, + "num_input_tokens_seen": 626095448, + "step": 4957 + }, + { + "epoch": 1.2718527544410954, + "grad_norm": 26.520689010620117, + "learning_rate": 5e-06, + "loss": 0.8496, + "num_input_tokens_seen": 626220720, + "step": 4958 + }, + { + "epoch": 1.2718527544410954, + "loss": 0.9125292301177979, + "loss_ce": 0.0004198401584289968, + "loss_iou": 0.416015625, + "loss_num": 0.015869140625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 626220720, + "step": 4958 + }, + { + "epoch": 1.2721092798050408, + "grad_norm": 21.581411361694336, + "learning_rate": 5e-06, + "loss": 0.7524, + "num_input_tokens_seen": 626345904, + "step": 4959 + }, + { + "epoch": 1.2721092798050408, + "loss": 0.7054440379142761, + "loss_ce": 0.00036592117976397276, + "loss_iou": 0.333984375, + "loss_num": 0.007415771484375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 626345904, + "step": 4959 + }, + { + "epoch": 1.2723658051689861, + "grad_norm": 19.068069458007812, + "learning_rate": 5e-06, + "loss": 0.8101, + "num_input_tokens_seen": 626471796, + "step": 4960 + }, + { + "epoch": 1.2723658051689861, + "loss": 0.8325120210647583, + "loss_ce": 0.0009690984152257442, + "loss_iou": 0.390625, + "loss_num": 0.00970458984375, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 626471796, + "step": 4960 + }, + { + "epoch": 1.2726223305329314, + "grad_norm": 23.849227905273438, + "learning_rate": 5e-06, + "loss": 0.8745, + "num_input_tokens_seen": 626597964, + "step": 4961 + }, + { + "epoch": 1.2726223305329314, + "loss": 0.8869057297706604, + "loss_ce": 0.0014076650841161609, + "loss_iou": 0.38671875, + "loss_num": 0.0220947265625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 626597964, + "step": 4961 + }, + { + "epoch": 1.2728788558968769, + "grad_norm": 28.85529899597168, + "learning_rate": 5e-06, + "loss": 0.8788, + "num_input_tokens_seen": 626724416, + "step": 4962 + }, + { + "epoch": 1.2728788558968769, + "loss": 0.9061418771743774, + "loss_ce": 0.0018450523493811488, + "loss_iou": 0.412109375, + "loss_num": 0.0159912109375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 626724416, + "step": 4962 + }, + { + "epoch": 1.2731353812608222, + "grad_norm": 27.655677795410156, + "learning_rate": 5e-06, + "loss": 0.8675, + "num_input_tokens_seen": 626850016, + "step": 4963 + }, + { + "epoch": 1.2731353812608222, + "loss": 0.7236914038658142, + "loss_ce": 0.000302726257359609, + "loss_iou": 0.34765625, + "loss_num": 0.005950927734375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 626850016, + "step": 4963 + }, + { + "epoch": 1.2733919066247674, + "grad_norm": 49.836021423339844, + "learning_rate": 5e-06, + "loss": 0.8935, + "num_input_tokens_seen": 626976784, + "step": 4964 + }, + { + "epoch": 1.2733919066247674, + "loss": 0.8418887853622437, + "loss_ce": 0.00106848927680403, + "loss_iou": 0.390625, + "loss_num": 0.01226806640625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 626976784, + "step": 4964 + }, + { + "epoch": 1.273648431988713, + "grad_norm": 37.52360153198242, + "learning_rate": 5e-06, + "loss": 0.8776, + "num_input_tokens_seen": 627102184, + "step": 4965 + }, + { + "epoch": 1.273648431988713, + "loss": 0.9744186401367188, + "loss_ce": 0.0042037819512188435, + "loss_iou": 0.4296875, + "loss_num": 0.02197265625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 627102184, + "step": 4965 + }, + { + "epoch": 1.2739049573526582, + "grad_norm": 38.31480407714844, + "learning_rate": 5e-06, + "loss": 0.867, + "num_input_tokens_seen": 627227396, + "step": 4966 + }, + { + "epoch": 1.2739049573526582, + "loss": 0.6506098508834839, + "loss_ce": 0.0009516425197944045, + "loss_iou": 0.3046875, + "loss_num": 0.008056640625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 627227396, + "step": 4966 + }, + { + "epoch": 1.2741614827166037, + "grad_norm": 38.97500991821289, + "learning_rate": 5e-06, + "loss": 0.848, + "num_input_tokens_seen": 627354844, + "step": 4967 + }, + { + "epoch": 1.2741614827166037, + "loss": 0.950712263584137, + "loss_ce": 0.0014934941427782178, + "loss_iou": 0.4375, + "loss_num": 0.01446533203125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 627354844, + "step": 4967 + }, + { + "epoch": 1.274418008080549, + "grad_norm": 30.295082092285156, + "learning_rate": 5e-06, + "loss": 0.8822, + "num_input_tokens_seen": 627481164, + "step": 4968 + }, + { + "epoch": 1.274418008080549, + "loss": 0.9232335686683655, + "loss_ce": 0.00013787506031803787, + "loss_iou": 0.427734375, + "loss_num": 0.01312255859375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 627481164, + "step": 4968 + }, + { + "epoch": 1.2746745334444944, + "grad_norm": 66.21936798095703, + "learning_rate": 5e-06, + "loss": 0.8313, + "num_input_tokens_seen": 627606716, + "step": 4969 + }, + { + "epoch": 1.2746745334444944, + "loss": 0.675948977470398, + "loss_ce": 0.00016771905939094722, + "loss_iou": 0.31640625, + "loss_num": 0.00885009765625, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 627606716, + "step": 4969 + }, + { + "epoch": 1.2749310588084397, + "grad_norm": 23.66092300415039, + "learning_rate": 5e-06, + "loss": 0.7069, + "num_input_tokens_seen": 627732588, + "step": 4970 + }, + { + "epoch": 1.2749310588084397, + "loss": 0.603900671005249, + "loss_ce": 0.0003850195789709687, + "loss_iou": 0.2890625, + "loss_num": 0.004852294921875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 627732588, + "step": 4970 + }, + { + "epoch": 1.275187584172385, + "grad_norm": 29.841333389282227, + "learning_rate": 5e-06, + "loss": 0.8422, + "num_input_tokens_seen": 627858900, + "step": 4971 + }, + { + "epoch": 1.275187584172385, + "loss": 0.9708854556083679, + "loss_ce": 0.0006706430576741695, + "loss_iou": 0.453125, + "loss_num": 0.01251220703125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 627858900, + "step": 4971 + }, + { + "epoch": 1.2754441095363305, + "grad_norm": 39.94166946411133, + "learning_rate": 5e-06, + "loss": 0.8055, + "num_input_tokens_seen": 627985172, + "step": 4972 + }, + { + "epoch": 1.2754441095363305, + "loss": 0.7928186058998108, + "loss_ce": 0.0003381170972716063, + "loss_iou": 0.373046875, + "loss_num": 0.00909423828125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 627985172, + "step": 4972 + }, + { + "epoch": 1.2757006349002757, + "grad_norm": 45.53670120239258, + "learning_rate": 5e-06, + "loss": 0.8506, + "num_input_tokens_seen": 628112552, + "step": 4973 + }, + { + "epoch": 1.2757006349002757, + "loss": 0.7908444404602051, + "loss_ce": 0.0003170930431224406, + "loss_iou": 0.375, + "loss_num": 0.0084228515625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 628112552, + "step": 4973 + }, + { + "epoch": 1.275957160264221, + "grad_norm": 46.26814270019531, + "learning_rate": 5e-06, + "loss": 0.8911, + "num_input_tokens_seen": 628238308, + "step": 4974 + }, + { + "epoch": 1.275957160264221, + "loss": 1.0110883712768555, + "loss_ce": 0.0008343904628418386, + "loss_iou": 0.44921875, + "loss_num": 0.0228271484375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 628238308, + "step": 4974 + }, + { + "epoch": 1.2762136856281665, + "grad_norm": 48.96000289916992, + "learning_rate": 5e-06, + "loss": 0.8559, + "num_input_tokens_seen": 628365000, + "step": 4975 + }, + { + "epoch": 1.2762136856281665, + "loss": 0.8261281251907349, + "loss_ce": 0.0014210953377187252, + "loss_iou": 0.3828125, + "loss_num": 0.0113525390625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 628365000, + "step": 4975 + }, + { + "epoch": 1.2764702109921118, + "grad_norm": 60.89643096923828, + "learning_rate": 5e-06, + "loss": 0.8397, + "num_input_tokens_seen": 628492120, + "step": 4976 + }, + { + "epoch": 1.2764702109921118, + "loss": 0.7982717752456665, + "loss_ce": 0.0026174297090619802, + "loss_iou": 0.369140625, + "loss_num": 0.01177978515625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 628492120, + "step": 4976 + }, + { + "epoch": 1.2767267363560573, + "grad_norm": 40.62421798706055, + "learning_rate": 5e-06, + "loss": 0.9689, + "num_input_tokens_seen": 628616892, + "step": 4977 + }, + { + "epoch": 1.2767267363560573, + "loss": 0.73210608959198, + "loss_ce": 0.00017244095215573907, + "loss_iou": 0.353515625, + "loss_num": 0.00531005859375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 628616892, + "step": 4977 + }, + { + "epoch": 1.2769832617200025, + "grad_norm": 21.516124725341797, + "learning_rate": 5e-06, + "loss": 0.8095, + "num_input_tokens_seen": 628743960, + "step": 4978 + }, + { + "epoch": 1.2769832617200025, + "loss": 0.7861276865005493, + "loss_ce": 0.00048313968000002205, + "loss_iou": 0.376953125, + "loss_num": 0.006500244140625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 628743960, + "step": 4978 + }, + { + "epoch": 1.277239787083948, + "grad_norm": 31.422386169433594, + "learning_rate": 5e-06, + "loss": 0.7465, + "num_input_tokens_seen": 628869816, + "step": 4979 + }, + { + "epoch": 1.277239787083948, + "loss": 0.7155649662017822, + "loss_ce": 0.00023296131985262036, + "loss_iou": 0.32421875, + "loss_num": 0.01361083984375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 628869816, + "step": 4979 + }, + { + "epoch": 1.2774963124478933, + "grad_norm": 28.591585159301758, + "learning_rate": 5e-06, + "loss": 0.9301, + "num_input_tokens_seen": 628996352, + "step": 4980 + }, + { + "epoch": 1.2774963124478933, + "loss": 0.6916320323944092, + "loss_ce": 0.0009581916383467615, + "loss_iou": 0.326171875, + "loss_num": 0.00726318359375, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 628996352, + "step": 4980 + }, + { + "epoch": 1.2777528378118386, + "grad_norm": 40.063011169433594, + "learning_rate": 5e-06, + "loss": 0.7308, + "num_input_tokens_seen": 629122636, + "step": 4981 + }, + { + "epoch": 1.2777528378118386, + "loss": 0.8101275563240051, + "loss_ce": 0.00031312997452914715, + "loss_iou": 0.369140625, + "loss_num": 0.01446533203125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 629122636, + "step": 4981 + }, + { + "epoch": 1.278009363175784, + "grad_norm": 51.96260452270508, + "learning_rate": 5e-06, + "loss": 0.9056, + "num_input_tokens_seen": 629249220, + "step": 4982 + }, + { + "epoch": 1.278009363175784, + "loss": 0.9144231677055359, + "loss_ce": 0.0008489244501106441, + "loss_iou": 0.4296875, + "loss_num": 0.01129150390625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 629249220, + "step": 4982 + }, + { + "epoch": 1.2782658885397293, + "grad_norm": 44.006248474121094, + "learning_rate": 5e-06, + "loss": 0.8209, + "num_input_tokens_seen": 629376164, + "step": 4983 + }, + { + "epoch": 1.2782658885397293, + "loss": 0.8048712015151978, + "loss_ce": 0.0001836443698266521, + "loss_iou": 0.369140625, + "loss_num": 0.01336669921875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 629376164, + "step": 4983 + }, + { + "epoch": 1.2785224139036746, + "grad_norm": 40.50237274169922, + "learning_rate": 5e-06, + "loss": 0.8351, + "num_input_tokens_seen": 629502640, + "step": 4984 + }, + { + "epoch": 1.2785224139036746, + "loss": 0.850642204284668, + "loss_ce": 0.003962525632232428, + "loss_iou": 0.388671875, + "loss_num": 0.01373291015625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 629502640, + "step": 4984 + }, + { + "epoch": 1.27877893926762, + "grad_norm": 44.773624420166016, + "learning_rate": 5e-06, + "loss": 0.7993, + "num_input_tokens_seen": 629629360, + "step": 4985 + }, + { + "epoch": 1.27877893926762, + "loss": 0.569650411605835, + "loss_ce": 7.034641748759896e-05, + "loss_iou": 0.27734375, + "loss_num": 0.003265380859375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 629629360, + "step": 4985 + }, + { + "epoch": 1.2790354646315654, + "grad_norm": 47.31255340576172, + "learning_rate": 5e-06, + "loss": 0.8287, + "num_input_tokens_seen": 629755768, + "step": 4986 + }, + { + "epoch": 1.2790354646315654, + "loss": 0.8933614492416382, + "loss_ce": 0.0007833242998458445, + "loss_iou": 0.41796875, + "loss_num": 0.0118408203125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 629755768, + "step": 4986 + }, + { + "epoch": 1.2792919899955109, + "grad_norm": 43.70411682128906, + "learning_rate": 5e-06, + "loss": 0.8222, + "num_input_tokens_seen": 629882588, + "step": 4987 + }, + { + "epoch": 1.2792919899955109, + "loss": 0.7809789180755615, + "loss_ce": 0.00021719773940276355, + "loss_iou": 0.375, + "loss_num": 0.006317138671875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 629882588, + "step": 4987 + }, + { + "epoch": 1.2795485153594561, + "grad_norm": 40.42557144165039, + "learning_rate": 5e-06, + "loss": 0.9961, + "num_input_tokens_seen": 630009268, + "step": 4988 + }, + { + "epoch": 1.2795485153594561, + "loss": 0.9921629428863525, + "loss_ce": 0.0033934309612959623, + "loss_iou": 0.439453125, + "loss_num": 0.021728515625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 630009268, + "step": 4988 + }, + { + "epoch": 1.2798050407234016, + "grad_norm": 45.084205627441406, + "learning_rate": 5e-06, + "loss": 0.8093, + "num_input_tokens_seen": 630135500, + "step": 4989 + }, + { + "epoch": 1.2798050407234016, + "loss": 0.8810088634490967, + "loss_ce": 0.0016143523389473557, + "loss_iou": 0.40234375, + "loss_num": 0.01513671875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 630135500, + "step": 4989 + }, + { + "epoch": 1.280061566087347, + "grad_norm": 44.27177810668945, + "learning_rate": 5e-06, + "loss": 0.9053, + "num_input_tokens_seen": 630262248, + "step": 4990 + }, + { + "epoch": 1.280061566087347, + "loss": 1.0000500679016113, + "loss_ce": 0.0024915302637964487, + "loss_iou": 0.44921875, + "loss_num": 0.020263671875, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 630262248, + "step": 4990 + }, + { + "epoch": 1.2803180914512922, + "grad_norm": 38.787811279296875, + "learning_rate": 5e-06, + "loss": 0.8118, + "num_input_tokens_seen": 630388296, + "step": 4991 + }, + { + "epoch": 1.2803180914512922, + "loss": 0.8325455784797668, + "loss_ce": 0.0007585249841213226, + "loss_iou": 0.39453125, + "loss_num": 0.00836181640625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 630388296, + "step": 4991 + }, + { + "epoch": 1.2805746168152377, + "grad_norm": 46.91154479980469, + "learning_rate": 5e-06, + "loss": 0.8216, + "num_input_tokens_seen": 630515460, + "step": 4992 + }, + { + "epoch": 1.2805746168152377, + "loss": 0.9004737734794617, + "loss_ce": 0.0025245645083487034, + "loss_iou": 0.416015625, + "loss_num": 0.0128173828125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 630515460, + "step": 4992 + }, + { + "epoch": 1.280831142179183, + "grad_norm": 51.57321548461914, + "learning_rate": 5e-06, + "loss": 0.8726, + "num_input_tokens_seen": 630641836, + "step": 4993 + }, + { + "epoch": 1.280831142179183, + "loss": 0.7379635572433472, + "loss_ce": 0.0009030087385326624, + "loss_iou": 0.3515625, + "loss_num": 0.006744384765625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 630641836, + "step": 4993 + }, + { + "epoch": 1.2810876675431282, + "grad_norm": 44.38196563720703, + "learning_rate": 5e-06, + "loss": 0.8297, + "num_input_tokens_seen": 630768076, + "step": 4994 + }, + { + "epoch": 1.2810876675431282, + "loss": 0.8667726516723633, + "loss_ce": 0.0003176179598085582, + "loss_iou": 0.392578125, + "loss_num": 0.01611328125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 630768076, + "step": 4994 + }, + { + "epoch": 1.2813441929070737, + "grad_norm": 46.12018966674805, + "learning_rate": 5e-06, + "loss": 0.8918, + "num_input_tokens_seen": 630894348, + "step": 4995 + }, + { + "epoch": 1.2813441929070737, + "loss": 1.073002815246582, + "loss_ce": 0.0002488900790922344, + "loss_iou": 0.478515625, + "loss_num": 0.0233154296875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 630894348, + "step": 4995 + }, + { + "epoch": 1.2816007182710192, + "grad_norm": 42.912872314453125, + "learning_rate": 5e-06, + "loss": 0.926, + "num_input_tokens_seen": 631021768, + "step": 4996 + }, + { + "epoch": 1.2816007182710192, + "loss": 0.9710201621055603, + "loss_ce": 0.006786763668060303, + "loss_iou": 0.42578125, + "loss_num": 0.0228271484375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 631021768, + "step": 4996 + }, + { + "epoch": 1.2818572436349644, + "grad_norm": 35.87915802001953, + "learning_rate": 5e-06, + "loss": 0.8558, + "num_input_tokens_seen": 631149996, + "step": 4997 + }, + { + "epoch": 1.2818572436349644, + "loss": 0.8437521457672119, + "loss_ce": 0.00244354922324419, + "loss_iou": 0.390625, + "loss_num": 0.01171875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 631149996, + "step": 4997 + }, + { + "epoch": 1.2821137689989097, + "grad_norm": 38.33866882324219, + "learning_rate": 5e-06, + "loss": 0.8816, + "num_input_tokens_seen": 631276648, + "step": 4998 + }, + { + "epoch": 1.2821137689989097, + "loss": 0.8231503367424011, + "loss_ce": 0.00039640642353333533, + "loss_iou": 0.37890625, + "loss_num": 0.01312255859375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 631276648, + "step": 4998 + }, + { + "epoch": 1.2823702943628552, + "grad_norm": 55.47458267211914, + "learning_rate": 5e-06, + "loss": 0.718, + "num_input_tokens_seen": 631403072, + "step": 4999 + }, + { + "epoch": 1.2823702943628552, + "loss": 0.8435832262039185, + "loss_ce": 0.00032148510217666626, + "loss_iou": 0.390625, + "loss_num": 0.01220703125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 631403072, + "step": 4999 + }, + { + "epoch": 1.2826268197268005, + "grad_norm": 51.20155334472656, + "learning_rate": 5e-06, + "loss": 0.75, + "num_input_tokens_seen": 631529428, + "step": 5000 + }, + { + "epoch": 1.2826268197268005, + "eval_icons_CIoU": 0.3532525897026062, + "eval_icons_GIoU": 0.3214935064315796, + "eval_icons_IoU": 0.5021793246269226, + "eval_icons_MAE_all": 0.020972085185348988, + "eval_icons_MAE_h": 0.018313856329768896, + "eval_icons_MAE_w": 0.04985995963215828, + "eval_icons_MAE_x_boxes": 0.0496402382850647, + "eval_icons_MAE_y_boxes": 0.01833692565560341, + "eval_icons_NUM_probability": 0.9998745024204254, + "eval_icons_inside_bbox": 0.7395833432674408, + "eval_icons_loss": 1.447240948677063, + "eval_icons_loss_ce": 4.5746584873995744e-05, + "eval_icons_loss_iou": 0.6649169921875, + "eval_icons_loss_num": 0.023511886596679688, + "eval_icons_loss_xval": 1.447021484375, + "eval_icons_runtime": 44.7863, + "eval_icons_samples_per_second": 1.116, + "eval_icons_steps_per_second": 0.045, + "num_input_tokens_seen": 631529428, + "step": 5000 + }, + { + "epoch": 1.2826268197268005, + "eval_screenspot_CIoU": 0.12930236756801605, + "eval_screenspot_GIoU": 0.10628310590982437, + "eval_screenspot_IoU": 0.29051242272059125, + "eval_screenspot_MAE_all": 0.07955996443827947, + "eval_screenspot_MAE_h": 0.07046306878328323, + "eval_screenspot_MAE_w": 0.1288147196173668, + "eval_screenspot_MAE_x_boxes": 0.10878805816173553, + "eval_screenspot_MAE_y_boxes": 0.05706369007627169, + "eval_screenspot_NUM_probability": 0.9999549587567648, + "eval_screenspot_inside_bbox": 0.5883333285649618, + "eval_screenspot_loss": 2.2319114208221436, + "eval_screenspot_loss_ce": 0.003960580254594485, + "eval_screenspot_loss_iou": 0.9200032552083334, + "eval_screenspot_loss_num": 0.08453114827473958, + "eval_screenspot_loss_xval": 2.2625325520833335, + "eval_screenspot_runtime": 68.8048, + "eval_screenspot_samples_per_second": 1.294, + "eval_screenspot_steps_per_second": 0.044, + "num_input_tokens_seen": 631529428, + "step": 5000 + } + ], + "logging_steps": 1.0, + "max_steps": 7796, + "num_input_tokens_seen": 631529428, + "num_train_epochs": 2, + "save_steps": 250, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.9379944633358877e+19, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}