| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.923076923076923, | |
| "eval_steps": 500, | |
| "global_step": 39, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 429.5760192871094, | |
| "learning_rate": 5e-06, | |
| "loss": 1.9642, | |
| "memory/device_mem_reserved(gib)": 51.52, | |
| "memory/max_mem_active(gib)": 46.79, | |
| "memory/max_mem_allocated(gib)": 45.85, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 88.243408203125, | |
| "learning_rate": 1e-05, | |
| "loss": 1.43, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 188.16270446777344, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 1.1989, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 238.33164978027344, | |
| "learning_rate": 2e-05, | |
| "loss": 1.4779, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 1.3076923076923077, | |
| "grad_norm": 60.53327178955078, | |
| "learning_rate": 1.9953596287703015e-05, | |
| "loss": 1.4517, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 1.6153846153846154, | |
| "grad_norm": 89.0710678100586, | |
| "learning_rate": 1.9905213270142184e-05, | |
| "loss": 1.1788, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 1.9230769230769231, | |
| "grad_norm": 74.55708312988281, | |
| "learning_rate": 1.9854721549636805e-05, | |
| "loss": 1.3326, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 46.6354866027832, | |
| "learning_rate": 1.9801980198019806e-05, | |
| "loss": 1.1162, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 11.567599296569824, | |
| "learning_rate": 1.974683544303798e-05, | |
| "loss": 1.0117, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 2.6153846153846154, | |
| "grad_norm": 15.884381294250488, | |
| "learning_rate": 1.9689119170984456e-05, | |
| "loss": 0.9432, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.9230769230769234, | |
| "grad_norm": 8.479954719543457, | |
| "learning_rate": 1.9628647214854114e-05, | |
| "loss": 0.9247, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 17.43779754638672, | |
| "learning_rate": 1.956521739130435e-05, | |
| "loss": 1.0189, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 3.3076923076923075, | |
| "grad_norm": 8.543362617492676, | |
| "learning_rate": 1.9498607242339832e-05, | |
| "loss": 0.9282, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 3.6153846153846154, | |
| "grad_norm": 5.215980052947998, | |
| "learning_rate": 1.942857142857143e-05, | |
| "loss": 0.8694, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 3.9230769230769234, | |
| "grad_norm": 4.073164463043213, | |
| "learning_rate": 1.935483870967742e-05, | |
| "loss": 0.8029, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 5.382778167724609, | |
| "learning_rate": 1.9277108433734944e-05, | |
| "loss": 0.7638, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 4.3076923076923075, | |
| "grad_norm": 4.377191066741943, | |
| "learning_rate": 1.9195046439628485e-05, | |
| "loss": 0.7751, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 4.615384615384615, | |
| "grad_norm": 3.5090882778167725, | |
| "learning_rate": 1.9108280254777068e-05, | |
| "loss": 0.731, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 4.923076923076923, | |
| "grad_norm": 4.811877727508545, | |
| "learning_rate": 1.9016393442622952e-05, | |
| "loss": 0.7118, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 4.822802543640137, | |
| "learning_rate": 1.891891891891892e-05, | |
| "loss": 0.6149, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 5.3076923076923075, | |
| "grad_norm": 4.164008140563965, | |
| "learning_rate": 1.8815331010452963e-05, | |
| "loss": 0.6936, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 5.615384615384615, | |
| "grad_norm": 3.9381167888641357, | |
| "learning_rate": 1.8705035971223024e-05, | |
| "loss": 0.6502, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 5.923076923076923, | |
| "grad_norm": 3.9260995388031006, | |
| "learning_rate": 1.858736059479554e-05, | |
| "loss": 0.633, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 5.373617649078369, | |
| "learning_rate": 1.846153846153846e-05, | |
| "loss": 0.4986, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 6.3076923076923075, | |
| "grad_norm": 4.031383514404297, | |
| "learning_rate": 1.8326693227091633e-05, | |
| "loss": 0.6118, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 6.615384615384615, | |
| "grad_norm": 4.3576436042785645, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 0.5786, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 6.923076923076923, | |
| "grad_norm": 3.389698028564453, | |
| "learning_rate": 1.8025751072961374e-05, | |
| "loss": 0.569, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 3.289379596710205, | |
| "learning_rate": 1.785714285714286e-05, | |
| "loss": 0.3683, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 7.3076923076923075, | |
| "grad_norm": 3.358076333999634, | |
| "learning_rate": 1.7674418604651163e-05, | |
| "loss": 0.5345, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 7.615384615384615, | |
| "grad_norm": 3.419854164123535, | |
| "learning_rate": 1.7475728155339808e-05, | |
| "loss": 0.5051, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 7.923076923076923, | |
| "grad_norm": 3.624562978744507, | |
| "learning_rate": 1.7258883248730966e-05, | |
| "loss": 0.4952, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 3.478980541229248, | |
| "learning_rate": 1.7021276595744686e-05, | |
| "loss": 0.2747, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 8.307692307692308, | |
| "grad_norm": 3.6138954162597656, | |
| "learning_rate": 1.675977653631285e-05, | |
| "loss": 0.4662, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 8.615384615384615, | |
| "grad_norm": 3.260951042175293, | |
| "learning_rate": 1.647058823529412e-05, | |
| "loss": 0.4351, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 8.923076923076923, | |
| "grad_norm": 4.216782093048096, | |
| "learning_rate": 1.6149068322981367e-05, | |
| "loss": 0.4366, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 3.045259475708008, | |
| "learning_rate": 1.5789473684210526e-05, | |
| "loss": 0.2067, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 9.307692307692308, | |
| "grad_norm": 3.721900463104248, | |
| "learning_rate": 1.5384615384615384e-05, | |
| "loss": 0.3979, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 9.615384615384615, | |
| "grad_norm": 4.223248481750488, | |
| "learning_rate": 1.4925373134328359e-05, | |
| "loss": 0.3831, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 9.923076923076923, | |
| "grad_norm": 4.570195198059082, | |
| "learning_rate": 1.4400000000000001e-05, | |
| "loss": 0.3919, | |
| "memory/device_mem_reserved(gib)": 51.53, | |
| "memory/max_mem_active(gib)": 46.82, | |
| "memory/max_mem_allocated(gib)": 45.88, | |
| "step": 39 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 48, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 12, | |
| "save_steps": 3, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.482969594438615e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |