| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9784172661870504, | |
| "eval_steps": 500, | |
| "global_step": 34, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02877697841726619, | |
| "grad_norm": 1.6665316820144653, | |
| "learning_rate": 4.989335440737586e-05, | |
| "loss": 1.4719, | |
| "num_input_tokens_seen": 2097152, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.05755395683453238, | |
| "grad_norm": 1.6307742595672607, | |
| "learning_rate": 4.957432749209755e-05, | |
| "loss": 1.4237, | |
| "num_input_tokens_seen": 4194304, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.08633093525179857, | |
| "grad_norm": 1.2694746255874634, | |
| "learning_rate": 4.9045641079320484e-05, | |
| "loss": 1.3171, | |
| "num_input_tokens_seen": 6291456, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.11510791366906475, | |
| "grad_norm": 1.123619794845581, | |
| "learning_rate": 4.8311805735108894e-05, | |
| "loss": 1.262, | |
| "num_input_tokens_seen": 8388608, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.14388489208633093, | |
| "grad_norm": 0.9699582457542419, | |
| "learning_rate": 4.7379082283876566e-05, | |
| "loss": 1.2216, | |
| "num_input_tokens_seen": 10485760, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.17266187050359713, | |
| "grad_norm": 0.7419703602790833, | |
| "learning_rate": 4.625542839324036e-05, | |
| "loss": 1.1564, | |
| "num_input_tokens_seen": 12582912, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.2014388489208633, | |
| "grad_norm": 0.6002612113952637, | |
| "learning_rate": 4.4950430682006e-05, | |
| "loss": 1.1289, | |
| "num_input_tokens_seen": 14680064, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.2302158273381295, | |
| "grad_norm": 0.4983214735984802, | |
| "learning_rate": 4.347522293051648e-05, | |
| "loss": 1.0862, | |
| "num_input_tokens_seen": 16777216, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.2589928057553957, | |
| "grad_norm": 0.4308398962020874, | |
| "learning_rate": 4.184239109116393e-05, | |
| "loss": 1.0842, | |
| "num_input_tokens_seen": 18874368, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.28776978417266186, | |
| "grad_norm": 0.35480186343193054, | |
| "learning_rate": 4.0065865909481417e-05, | |
| "loss": 1.0345, | |
| "num_input_tokens_seen": 20971520, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.31654676258992803, | |
| "grad_norm": 0.3167465627193451, | |
| "learning_rate": 3.81608040719339e-05, | |
| "loss": 1.0473, | |
| "num_input_tokens_seen": 23068672, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.34532374100719426, | |
| "grad_norm": 0.2900119423866272, | |
| "learning_rate": 3.6143458894413465e-05, | |
| "loss": 1.0355, | |
| "num_input_tokens_seen": 25165824, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.37410071942446044, | |
| "grad_norm": 0.2785671651363373, | |
| "learning_rate": 3.403104165467883e-05, | |
| "loss": 1.0341, | |
| "num_input_tokens_seen": 27262976, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.4028776978417266, | |
| "grad_norm": 0.24713565409183502, | |
| "learning_rate": 3.1841574751802076e-05, | |
| "loss": 0.9967, | |
| "num_input_tokens_seen": 29360128, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.4316546762589928, | |
| "grad_norm": 0.2206040620803833, | |
| "learning_rate": 2.9593737945414264e-05, | |
| "loss": 0.9901, | |
| "num_input_tokens_seen": 31457280, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.460431654676259, | |
| "grad_norm": 0.2218572199344635, | |
| "learning_rate": 2.7306708986582553e-05, | |
| "loss": 0.9894, | |
| "num_input_tokens_seen": 33554432, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.4892086330935252, | |
| "grad_norm": 0.1956264078617096, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.9596, | |
| "num_input_tokens_seen": 35651584, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.5179856115107914, | |
| "grad_norm": 0.2028598040342331, | |
| "learning_rate": 2.2693291013417453e-05, | |
| "loss": 0.9862, | |
| "num_input_tokens_seen": 37748736, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.5467625899280576, | |
| "grad_norm": 0.19911186397075653, | |
| "learning_rate": 2.0406262054585738e-05, | |
| "loss": 0.9911, | |
| "num_input_tokens_seen": 39845888, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.5755395683453237, | |
| "grad_norm": 0.17156538367271423, | |
| "learning_rate": 1.815842524819793e-05, | |
| "loss": 0.95, | |
| "num_input_tokens_seen": 41943040, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.60431654676259, | |
| "grad_norm": 0.1637849360704422, | |
| "learning_rate": 1.5968958345321178e-05, | |
| "loss": 0.9235, | |
| "num_input_tokens_seen": 44040192, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.6330935251798561, | |
| "grad_norm": 0.1640578806400299, | |
| "learning_rate": 1.3856541105586545e-05, | |
| "loss": 0.952, | |
| "num_input_tokens_seen": 46137344, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.6618705035971223, | |
| "grad_norm": 0.15264524519443512, | |
| "learning_rate": 1.1839195928066102e-05, | |
| "loss": 0.9199, | |
| "num_input_tokens_seen": 48234496, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.6906474820143885, | |
| "grad_norm": 0.15897633135318756, | |
| "learning_rate": 9.934134090518593e-06, | |
| "loss": 0.9399, | |
| "num_input_tokens_seen": 50331648, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.7194244604316546, | |
| "grad_norm": 0.1521052122116089, | |
| "learning_rate": 8.15760890883607e-06, | |
| "loss": 0.9307, | |
| "num_input_tokens_seen": 52428800, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.7482014388489209, | |
| "grad_norm": 0.15674357116222382, | |
| "learning_rate": 6.524777069483526e-06, | |
| "loss": 0.9579, | |
| "num_input_tokens_seen": 54525952, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.7769784172661871, | |
| "grad_norm": 0.1539985090494156, | |
| "learning_rate": 5.049569317994013e-06, | |
| "loss": 0.9388, | |
| "num_input_tokens_seen": 56623104, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.8057553956834532, | |
| "grad_norm": 0.14510491490364075, | |
| "learning_rate": 3.7445716067596503e-06, | |
| "loss": 0.9241, | |
| "num_input_tokens_seen": 58720256, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.8345323741007195, | |
| "grad_norm": 0.15253807604312897, | |
| "learning_rate": 2.6209177161234445e-06, | |
| "loss": 0.9275, | |
| "num_input_tokens_seen": 60817408, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.8633093525179856, | |
| "grad_norm": 0.15724140405654907, | |
| "learning_rate": 1.6881942648911076e-06, | |
| "loss": 0.9295, | |
| "num_input_tokens_seen": 62914560, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.8920863309352518, | |
| "grad_norm": 0.14930537343025208, | |
| "learning_rate": 9.54358920679524e-07, | |
| "loss": 0.9255, | |
| "num_input_tokens_seen": 65011712, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.920863309352518, | |
| "grad_norm": 0.14510829746723175, | |
| "learning_rate": 4.256725079024554e-07, | |
| "loss": 0.9277, | |
| "num_input_tokens_seen": 67108864, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.9496402877697842, | |
| "grad_norm": 0.14531178772449493, | |
| "learning_rate": 1.0664559262413831e-07, | |
| "loss": 0.9263, | |
| "num_input_tokens_seen": 69206016, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.9784172661870504, | |
| "grad_norm": 0.1455848217010498, | |
| "learning_rate": 0.0, | |
| "loss": 0.9213, | |
| "num_input_tokens_seen": 71303168, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.9784172661870504, | |
| "num_input_tokens_seen": 71303168, | |
| "step": 34, | |
| "total_flos": 3.1553472363893883e+18, | |
| "train_loss": 1.0356257505276625, | |
| "train_runtime": 5786.0102, | |
| "train_samples_per_second": 3.069, | |
| "train_steps_per_second": 0.006 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 34, | |
| "num_input_tokens_seen": 71303168, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.1553472363893883e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |