| { | |
| "best_metric": 108.10810810810811, | |
| "best_model_checkpoint": "whisper-small-sk-timestamp/checkpoint-10000", | |
| "epoch": 63.96588486140725, | |
| "eval_steps": 5000, | |
| "global_step": 60000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.5330490405117271, | |
| "grad_norm": 6.951908588409424, | |
| "learning_rate": 9.980000000000001e-06, | |
| "loss": 0.7674, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0660980810234542, | |
| "grad_norm": 5.4410576820373535, | |
| "learning_rate": 9.916134453781513e-06, | |
| "loss": 0.6034, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.5991471215351813, | |
| "grad_norm": 6.041841506958008, | |
| "learning_rate": 9.832100840336136e-06, | |
| "loss": 0.4504, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.1321961620469083, | |
| "grad_norm": 6.625973701477051, | |
| "learning_rate": 9.748067226890757e-06, | |
| "loss": 0.3987, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.6652452025586353, | |
| "grad_norm": 7.889552593231201, | |
| "learning_rate": 9.664201680672269e-06, | |
| "loss": 0.258, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.1982942430703627, | |
| "grad_norm": 5.555081367492676, | |
| "learning_rate": 9.580168067226892e-06, | |
| "loss": 0.213, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.7313432835820897, | |
| "grad_norm": 4.576934337615967, | |
| "learning_rate": 9.496134453781513e-06, | |
| "loss": 0.127, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.264392324093817, | |
| "grad_norm": 4.3578410148620605, | |
| "learning_rate": 9.412100840336135e-06, | |
| "loss": 0.0974, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.797441364605544, | |
| "grad_norm": 3.2350175380706787, | |
| "learning_rate": 9.328067226890758e-06, | |
| "loss": 0.063, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.330490405117271, | |
| "grad_norm": 3.677992820739746, | |
| "learning_rate": 9.244033613445379e-06, | |
| "loss": 0.0459, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.330490405117271, | |
| "eval_loss": 1.0499684810638428, | |
| "eval_runtime": 309.3855, | |
| "eval_samples_per_second": 2.033, | |
| "eval_steps_per_second": 2.033, | |
| "eval_wer": 121.7806041335453, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.863539445628998, | |
| "grad_norm": 6.1787638664245605, | |
| "learning_rate": 9.16016806722689e-06, | |
| "loss": 0.0359, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 6.396588486140725, | |
| "grad_norm": 5.001561641693115, | |
| "learning_rate": 9.076134453781514e-06, | |
| "loss": 0.0277, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.929637526652452, | |
| "grad_norm": 3.8890867233276367, | |
| "learning_rate": 8.992100840336135e-06, | |
| "loss": 0.0262, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 7.462686567164179, | |
| "grad_norm": 3.166557788848877, | |
| "learning_rate": 8.908067226890758e-06, | |
| "loss": 0.0191, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.995735607675906, | |
| "grad_norm": 2.1672661304473877, | |
| "learning_rate": 8.824033613445378e-06, | |
| "loss": 0.0206, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 8.528784648187633, | |
| "grad_norm": 2.7271361351013184, | |
| "learning_rate": 8.740000000000001e-06, | |
| "loss": 0.015, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 9.06183368869936, | |
| "grad_norm": 2.9740970134735107, | |
| "learning_rate": 8.656134453781513e-06, | |
| "loss": 0.0162, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 9.594882729211088, | |
| "grad_norm": 7.9420881271362305, | |
| "learning_rate": 8.572100840336134e-06, | |
| "loss": 0.0134, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 10.127931769722814, | |
| "grad_norm": 3.1452889442443848, | |
| "learning_rate": 8.488067226890757e-06, | |
| "loss": 0.0146, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 10.660980810234541, | |
| "grad_norm": 3.0432803630828857, | |
| "learning_rate": 8.404033613445379e-06, | |
| "loss": 0.0121, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 10.660980810234541, | |
| "eval_loss": 1.180967926979065, | |
| "eval_runtime": 301.0478, | |
| "eval_samples_per_second": 2.089, | |
| "eval_steps_per_second": 2.089, | |
| "eval_wer": 108.10810810810811, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 11.194029850746269, | |
| "grad_norm": 2.229727268218994, | |
| "learning_rate": 8.32e-06, | |
| "loss": 0.0119, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 11.727078891257996, | |
| "grad_norm": 4.256361484527588, | |
| "learning_rate": 8.235966386554623e-06, | |
| "loss": 0.0112, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 12.260127931769723, | |
| "grad_norm": 1.0416252613067627, | |
| "learning_rate": 8.151932773109244e-06, | |
| "loss": 0.0106, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 12.79317697228145, | |
| "grad_norm": 2.5392019748687744, | |
| "learning_rate": 8.067899159663867e-06, | |
| "loss": 0.0101, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 13.326226012793176, | |
| "grad_norm": 3.5557861328125, | |
| "learning_rate": 7.98403361344538e-06, | |
| "loss": 0.0087, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 13.859275053304904, | |
| "grad_norm": 3.0016672611236572, | |
| "learning_rate": 7.900168067226891e-06, | |
| "loss": 0.0096, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 14.392324093816631, | |
| "grad_norm": 4.090542793273926, | |
| "learning_rate": 7.816302521008404e-06, | |
| "loss": 0.0082, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 14.925373134328359, | |
| "grad_norm": 3.5574986934661865, | |
| "learning_rate": 7.732268907563026e-06, | |
| "loss": 0.0085, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 15.458422174840086, | |
| "grad_norm": 0.5061938166618347, | |
| "learning_rate": 7.648235294117647e-06, | |
| "loss": 0.0078, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 15.991471215351812, | |
| "grad_norm": 2.1827197074890137, | |
| "learning_rate": 7.564201680672269e-06, | |
| "loss": 0.008, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 15.991471215351812, | |
| "eval_loss": 1.2772396802902222, | |
| "eval_runtime": 307.2094, | |
| "eval_samples_per_second": 2.047, | |
| "eval_steps_per_second": 2.047, | |
| "eval_wer": 110.65182829888711, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 16.52452025586354, | |
| "grad_norm": 1.1337757110595703, | |
| "learning_rate": 7.480168067226892e-06, | |
| "loss": 0.0066, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 17.057569296375267, | |
| "grad_norm": 2.044987678527832, | |
| "learning_rate": 7.396134453781513e-06, | |
| "loss": 0.007, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 17.590618336886994, | |
| "grad_norm": 2.3151865005493164, | |
| "learning_rate": 7.312100840336135e-06, | |
| "loss": 0.0059, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 18.12366737739872, | |
| "grad_norm": 0.7851815223693848, | |
| "learning_rate": 7.228067226890757e-06, | |
| "loss": 0.0065, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 18.65671641791045, | |
| "grad_norm": 1.8914296627044678, | |
| "learning_rate": 7.144033613445379e-06, | |
| "loss": 0.0058, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 19.189765458422176, | |
| "grad_norm": 1.666257619857788, | |
| "learning_rate": 7.06e-06, | |
| "loss": 0.0059, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 19.722814498933904, | |
| "grad_norm": 1.1958131790161133, | |
| "learning_rate": 6.975966386554622e-06, | |
| "loss": 0.0058, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 20.255863539445627, | |
| "grad_norm": 2.6292991638183594, | |
| "learning_rate": 6.891932773109245e-06, | |
| "loss": 0.0046, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 20.788912579957355, | |
| "grad_norm": 3.294924736022949, | |
| "learning_rate": 6.807899159663867e-06, | |
| "loss": 0.0048, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 21.321961620469082, | |
| "grad_norm": 3.993171453475952, | |
| "learning_rate": 6.723865546218487e-06, | |
| "loss": 0.0049, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 21.321961620469082, | |
| "eval_loss": 1.311843991279602, | |
| "eval_runtime": 310.2024, | |
| "eval_samples_per_second": 2.028, | |
| "eval_steps_per_second": 2.028, | |
| "eval_wer": 110.96979332273449, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 21.85501066098081, | |
| "grad_norm": 2.863147020339966, | |
| "learning_rate": 6.640000000000001e-06, | |
| "loss": 0.0044, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 22.388059701492537, | |
| "grad_norm": 3.074666976928711, | |
| "learning_rate": 6.555966386554622e-06, | |
| "loss": 0.0045, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 22.921108742004265, | |
| "grad_norm": 3.2533252239227295, | |
| "learning_rate": 6.471932773109244e-06, | |
| "loss": 0.0045, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 23.454157782515992, | |
| "grad_norm": 0.5005853176116943, | |
| "learning_rate": 6.387899159663867e-06, | |
| "loss": 0.0037, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 23.98720682302772, | |
| "grad_norm": 0.6059980988502502, | |
| "learning_rate": 6.303865546218488e-06, | |
| "loss": 0.0044, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 24.520255863539447, | |
| "grad_norm": 2.694021701812744, | |
| "learning_rate": 6.219831932773109e-06, | |
| "loss": 0.0035, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 25.053304904051174, | |
| "grad_norm": 4.914281845092773, | |
| "learning_rate": 6.1357983193277316e-06, | |
| "loss": 0.0036, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 25.5863539445629, | |
| "grad_norm": 0.22623255848884583, | |
| "learning_rate": 6.051764705882354e-06, | |
| "loss": 0.003, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 26.119402985074625, | |
| "grad_norm": 1.5711487531661987, | |
| "learning_rate": 5.967731092436976e-06, | |
| "loss": 0.0032, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 26.652452025586353, | |
| "grad_norm": 0.28046905994415283, | |
| "learning_rate": 5.883865546218488e-06, | |
| "loss": 0.0035, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 26.652452025586353, | |
| "eval_loss": 1.3698959350585938, | |
| "eval_runtime": 302.8259, | |
| "eval_samples_per_second": 2.077, | |
| "eval_steps_per_second": 2.077, | |
| "eval_wer": 108.10810810810811, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 27.18550106609808, | |
| "grad_norm": 2.411724090576172, | |
| "learning_rate": 5.79983193277311e-06, | |
| "loss": 0.0031, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 27.718550106609808, | |
| "grad_norm": 0.6550215482711792, | |
| "learning_rate": 5.715798319327731e-06, | |
| "loss": 0.0028, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 28.251599147121535, | |
| "grad_norm": 1.863209843635559, | |
| "learning_rate": 5.631764705882354e-06, | |
| "loss": 0.003, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 28.784648187633262, | |
| "grad_norm": 1.5927518606185913, | |
| "learning_rate": 5.547731092436976e-06, | |
| "loss": 0.0025, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 29.31769722814499, | |
| "grad_norm": 0.14862287044525146, | |
| "learning_rate": 5.463697478991597e-06, | |
| "loss": 0.0024, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 29.850746268656717, | |
| "grad_norm": 0.7016921043395996, | |
| "learning_rate": 5.37983193277311e-06, | |
| "loss": 0.0026, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 30.383795309168445, | |
| "grad_norm": 0.9124877452850342, | |
| "learning_rate": 5.295798319327732e-06, | |
| "loss": 0.0019, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 30.916844349680172, | |
| "grad_norm": 0.1633589118719101, | |
| "learning_rate": 5.211932773109244e-06, | |
| "loss": 0.0025, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 31.449893390191896, | |
| "grad_norm": 0.8394401669502258, | |
| "learning_rate": 5.127899159663866e-06, | |
| "loss": 0.0022, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 31.982942430703623, | |
| "grad_norm": 1.288855791091919, | |
| "learning_rate": 5.043865546218488e-06, | |
| "loss": 0.0027, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 31.982942430703623, | |
| "eval_loss": 1.3588825464248657, | |
| "eval_runtime": 302.1181, | |
| "eval_samples_per_second": 2.082, | |
| "eval_steps_per_second": 2.082, | |
| "eval_wer": 109.37996820349763, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 32.51599147121535, | |
| "grad_norm": 0.12793661653995514, | |
| "learning_rate": 4.9598319327731096e-06, | |
| "loss": 0.002, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 33.04904051172708, | |
| "grad_norm": 3.8341634273529053, | |
| "learning_rate": 4.875798319327732e-06, | |
| "loss": 0.002, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 33.582089552238806, | |
| "grad_norm": 0.32009947299957275, | |
| "learning_rate": 4.791764705882353e-06, | |
| "loss": 0.0015, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 34.11513859275053, | |
| "grad_norm": 2.231820583343506, | |
| "learning_rate": 4.707731092436975e-06, | |
| "loss": 0.0019, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 34.64818763326226, | |
| "grad_norm": 0.7585958242416382, | |
| "learning_rate": 4.623697478991598e-06, | |
| "loss": 0.0014, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 35.18123667377399, | |
| "grad_norm": 0.08880181610584259, | |
| "learning_rate": 4.539831932773109e-06, | |
| "loss": 0.0015, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 35.714285714285715, | |
| "grad_norm": 0.26556718349456787, | |
| "learning_rate": 4.455966386554622e-06, | |
| "loss": 0.0016, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 36.24733475479744, | |
| "grad_norm": 0.04059808701276779, | |
| "learning_rate": 4.371932773109244e-06, | |
| "loss": 0.0015, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 36.78038379530917, | |
| "grad_norm": 0.6713038682937622, | |
| "learning_rate": 4.2878991596638655e-06, | |
| "loss": 0.0013, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 37.3134328358209, | |
| "grad_norm": 0.15536823868751526, | |
| "learning_rate": 4.203865546218488e-06, | |
| "loss": 0.0021, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 37.3134328358209, | |
| "eval_loss": 1.4547514915466309, | |
| "eval_runtime": 304.3882, | |
| "eval_samples_per_second": 2.066, | |
| "eval_steps_per_second": 2.066, | |
| "eval_wer": 108.90302066772655, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 37.846481876332625, | |
| "grad_norm": 0.045942340046167374, | |
| "learning_rate": 4.119831932773109e-06, | |
| "loss": 0.0014, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 38.37953091684435, | |
| "grad_norm": 0.2848321199417114, | |
| "learning_rate": 4.035798319327731e-06, | |
| "loss": 0.0013, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 38.91257995735608, | |
| "grad_norm": 0.05061887204647064, | |
| "learning_rate": 3.9517647058823536e-06, | |
| "loss": 0.001, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 39.44562899786781, | |
| "grad_norm": 0.20606639981269836, | |
| "learning_rate": 3.867899159663866e-06, | |
| "loss": 0.0011, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 39.97867803837953, | |
| "grad_norm": 0.23128168284893036, | |
| "learning_rate": 3.7838655462184875e-06, | |
| "loss": 0.0011, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 40.511727078891255, | |
| "grad_norm": 0.08404552936553955, | |
| "learning_rate": 3.6998319327731098e-06, | |
| "loss": 0.001, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 41.04477611940298, | |
| "grad_norm": 0.14134559035301208, | |
| "learning_rate": 3.615798319327731e-06, | |
| "loss": 0.0013, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 41.57782515991471, | |
| "grad_norm": 0.5000337362289429, | |
| "learning_rate": 3.5317647058823534e-06, | |
| "loss": 0.0011, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 42.11087420042644, | |
| "grad_norm": 0.1047857478260994, | |
| "learning_rate": 3.4478991596638655e-06, | |
| "loss": 0.0008, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 42.643923240938165, | |
| "grad_norm": 0.8483943343162537, | |
| "learning_rate": 3.3638655462184878e-06, | |
| "loss": 0.0008, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 42.643923240938165, | |
| "eval_loss": 1.484838843345642, | |
| "eval_runtime": 303.2812, | |
| "eval_samples_per_second": 2.074, | |
| "eval_steps_per_second": 2.074, | |
| "eval_wer": 108.2670906200318, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 43.17697228144989, | |
| "grad_norm": 0.036445315927267075, | |
| "learning_rate": 3.279831932773109e-06, | |
| "loss": 0.0007, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 43.71002132196162, | |
| "grad_norm": 0.026431705802679062, | |
| "learning_rate": 3.1957983193277313e-06, | |
| "loss": 0.0007, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 44.24307036247335, | |
| "grad_norm": 0.05968892574310303, | |
| "learning_rate": 3.111764705882353e-06, | |
| "loss": 0.0008, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 44.776119402985074, | |
| "grad_norm": 0.2520725429058075, | |
| "learning_rate": 3.0278991596638657e-06, | |
| "loss": 0.0008, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 45.3091684434968, | |
| "grad_norm": 0.012703795917332172, | |
| "learning_rate": 2.943865546218488e-06, | |
| "loss": 0.0005, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 45.84221748400853, | |
| "grad_norm": 0.07976607233285904, | |
| "learning_rate": 2.8598319327731093e-06, | |
| "loss": 0.0008, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 46.375266524520256, | |
| "grad_norm": 0.023941069841384888, | |
| "learning_rate": 2.7757983193277316e-06, | |
| "loss": 0.0006, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 46.908315565031984, | |
| "grad_norm": 2.228559732437134, | |
| "learning_rate": 2.6917647058823534e-06, | |
| "loss": 0.0005, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 47.44136460554371, | |
| "grad_norm": 0.04434338957071304, | |
| "learning_rate": 2.607899159663866e-06, | |
| "loss": 0.0004, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 47.97441364605544, | |
| "grad_norm": 0.08504273742437363, | |
| "learning_rate": 2.5238655462184873e-06, | |
| "loss": 0.0005, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 47.97441364605544, | |
| "eval_loss": 1.5823848247528076, | |
| "eval_runtime": 304.8566, | |
| "eval_samples_per_second": 2.063, | |
| "eval_steps_per_second": 2.063, | |
| "eval_wer": 108.2670906200318, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 48.507462686567166, | |
| "grad_norm": 0.06245756521821022, | |
| "learning_rate": 2.439831932773109e-06, | |
| "loss": 0.0005, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 49.04051172707889, | |
| "grad_norm": 0.007986600510776043, | |
| "learning_rate": 2.3557983193277313e-06, | |
| "loss": 0.0002, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 49.57356076759062, | |
| "grad_norm": 0.005643850192427635, | |
| "learning_rate": 2.271764705882353e-06, | |
| "loss": 0.0002, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 50.10660980810235, | |
| "grad_norm": 0.011920343153178692, | |
| "learning_rate": 2.187731092436975e-06, | |
| "loss": 0.0002, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 50.639658848614076, | |
| "grad_norm": 0.02385249361395836, | |
| "learning_rate": 2.1036974789915967e-06, | |
| "loss": 0.0002, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 51.172707889125796, | |
| "grad_norm": 0.005474976263940334, | |
| "learning_rate": 2.0196638655462185e-06, | |
| "loss": 0.0003, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 51.70575692963752, | |
| "grad_norm": 0.024762336164712906, | |
| "learning_rate": 1.935798319327731e-06, | |
| "loss": 0.0002, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 52.23880597014925, | |
| "grad_norm": 0.0038718734867870808, | |
| "learning_rate": 1.8517647058823531e-06, | |
| "loss": 0.0002, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 52.77185501066098, | |
| "grad_norm": 0.028253620490431786, | |
| "learning_rate": 1.7677310924369751e-06, | |
| "loss": 0.0002, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 53.304904051172706, | |
| "grad_norm": 0.004596550948917866, | |
| "learning_rate": 1.683697478991597e-06, | |
| "loss": 0.0001, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 53.304904051172706, | |
| "eval_loss": 1.5414237976074219, | |
| "eval_runtime": 304.2868, | |
| "eval_samples_per_second": 2.067, | |
| "eval_steps_per_second": 2.067, | |
| "eval_wer": 108.74403815580287, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 53.83795309168443, | |
| "grad_norm": 0.003750466974452138, | |
| "learning_rate": 1.5996638655462185e-06, | |
| "loss": 0.0002, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 54.37100213219616, | |
| "grad_norm": 0.017754871398210526, | |
| "learning_rate": 1.5156302521008403e-06, | |
| "loss": 0.0003, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 54.90405117270789, | |
| "grad_norm": 0.010330211371183395, | |
| "learning_rate": 1.4315966386554623e-06, | |
| "loss": 0.0002, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 55.437100213219615, | |
| "grad_norm": 0.007948646321892738, | |
| "learning_rate": 1.3475630252100841e-06, | |
| "loss": 0.0002, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 55.97014925373134, | |
| "grad_norm": 0.008005546405911446, | |
| "learning_rate": 1.263529411764706e-06, | |
| "loss": 0.0003, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 56.50319829424307, | |
| "grad_norm": 0.005044952500611544, | |
| "learning_rate": 1.1794957983193277e-06, | |
| "loss": 0.0001, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 57.0362473347548, | |
| "grad_norm": 0.004209898877888918, | |
| "learning_rate": 1.0954621848739497e-06, | |
| "loss": 0.0001, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 57.569296375266525, | |
| "grad_norm": 0.002882936969399452, | |
| "learning_rate": 1.0114285714285715e-06, | |
| "loss": 0.0001, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 58.10234541577825, | |
| "grad_norm": 0.004143028520047665, | |
| "learning_rate": 9.275630252100841e-07, | |
| "loss": 0.0001, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 58.63539445628998, | |
| "grad_norm": 0.002750619314610958, | |
| "learning_rate": 8.436974789915966e-07, | |
| "loss": 0.0001, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 58.63539445628998, | |
| "eval_loss": 1.5718265771865845, | |
| "eval_runtime": 306.4329, | |
| "eval_samples_per_second": 2.053, | |
| "eval_steps_per_second": 2.053, | |
| "eval_wer": 112.241653418124, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 59.16844349680171, | |
| "grad_norm": 0.003843628568574786, | |
| "learning_rate": 7.596638655462185e-07, | |
| "loss": 0.0001, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 59.701492537313435, | |
| "grad_norm": 0.002774209715425968, | |
| "learning_rate": 6.756302521008404e-07, | |
| "loss": 0.0001, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 60.23454157782516, | |
| "grad_norm": 0.0018511614762246609, | |
| "learning_rate": 5.915966386554622e-07, | |
| "loss": 0.0, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 60.76759061833689, | |
| "grad_norm": 0.004432315472513437, | |
| "learning_rate": 5.075630252100841e-07, | |
| "loss": 0.0, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 61.30063965884862, | |
| "grad_norm": 0.001999880885705352, | |
| "learning_rate": 4.235294117647059e-07, | |
| "loss": 0.0, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 61.833688699360344, | |
| "grad_norm": 0.0016811841633170843, | |
| "learning_rate": 3.3949579831932776e-07, | |
| "loss": 0.0, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 62.36673773987207, | |
| "grad_norm": 0.0018331070896238089, | |
| "learning_rate": 2.554621848739496e-07, | |
| "loss": 0.0, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 62.89978678038379, | |
| "grad_norm": 0.0016816870775073767, | |
| "learning_rate": 1.7142857142857146e-07, | |
| "loss": 0.0, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 63.43283582089552, | |
| "grad_norm": 0.001792965573258698, | |
| "learning_rate": 8.739495798319328e-08, | |
| "loss": 0.0, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 63.96588486140725, | |
| "grad_norm": 0.0018047387711703777, | |
| "learning_rate": 3.3613445378151263e-09, | |
| "loss": 0.0, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 63.96588486140725, | |
| "eval_loss": 1.597470760345459, | |
| "eval_runtime": 305.909, | |
| "eval_samples_per_second": 2.056, | |
| "eval_steps_per_second": 2.056, | |
| "eval_wer": 111.44674085850556, | |
| "step": 60000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 60000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 64, | |
| "save_steps": 5000, | |
| "total_flos": 2.7687835638521856e+20, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |