{ "best_metric": 108.10810810810811, "best_model_checkpoint": "whisper-small-sk-timestamp/checkpoint-10000", "epoch": 63.96588486140725, "eval_steps": 5000, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5330490405117271, "grad_norm": 6.951908588409424, "learning_rate": 9.980000000000001e-06, "loss": 0.7674, "step": 500 }, { "epoch": 1.0660980810234542, "grad_norm": 5.4410576820373535, "learning_rate": 9.916134453781513e-06, "loss": 0.6034, "step": 1000 }, { "epoch": 1.5991471215351813, "grad_norm": 6.041841506958008, "learning_rate": 9.832100840336136e-06, "loss": 0.4504, "step": 1500 }, { "epoch": 2.1321961620469083, "grad_norm": 6.625973701477051, "learning_rate": 9.748067226890757e-06, "loss": 0.3987, "step": 2000 }, { "epoch": 2.6652452025586353, "grad_norm": 7.889552593231201, "learning_rate": 9.664201680672269e-06, "loss": 0.258, "step": 2500 }, { "epoch": 3.1982942430703627, "grad_norm": 5.555081367492676, "learning_rate": 9.580168067226892e-06, "loss": 0.213, "step": 3000 }, { "epoch": 3.7313432835820897, "grad_norm": 4.576934337615967, "learning_rate": 9.496134453781513e-06, "loss": 0.127, "step": 3500 }, { "epoch": 4.264392324093817, "grad_norm": 4.3578410148620605, "learning_rate": 9.412100840336135e-06, "loss": 0.0974, "step": 4000 }, { "epoch": 4.797441364605544, "grad_norm": 3.2350175380706787, "learning_rate": 9.328067226890758e-06, "loss": 0.063, "step": 4500 }, { "epoch": 5.330490405117271, "grad_norm": 3.677992820739746, "learning_rate": 9.244033613445379e-06, "loss": 0.0459, "step": 5000 }, { "epoch": 5.330490405117271, "eval_loss": 1.0499684810638428, "eval_runtime": 309.3855, "eval_samples_per_second": 2.033, "eval_steps_per_second": 2.033, "eval_wer": 121.7806041335453, "step": 5000 }, { "epoch": 5.863539445628998, "grad_norm": 6.1787638664245605, "learning_rate": 9.16016806722689e-06, "loss": 0.0359, "step": 5500 }, { "epoch": 6.396588486140725, "grad_norm": 5.001561641693115, "learning_rate": 9.076134453781514e-06, "loss": 0.0277, "step": 6000 }, { "epoch": 6.929637526652452, "grad_norm": 3.8890867233276367, "learning_rate": 8.992100840336135e-06, "loss": 0.0262, "step": 6500 }, { "epoch": 7.462686567164179, "grad_norm": 3.166557788848877, "learning_rate": 8.908067226890758e-06, "loss": 0.0191, "step": 7000 }, { "epoch": 7.995735607675906, "grad_norm": 2.1672661304473877, "learning_rate": 8.824033613445378e-06, "loss": 0.0206, "step": 7500 }, { "epoch": 8.528784648187633, "grad_norm": 2.7271361351013184, "learning_rate": 8.740000000000001e-06, "loss": 0.015, "step": 8000 }, { "epoch": 9.06183368869936, "grad_norm": 2.9740970134735107, "learning_rate": 8.656134453781513e-06, "loss": 0.0162, "step": 8500 }, { "epoch": 9.594882729211088, "grad_norm": 7.9420881271362305, "learning_rate": 8.572100840336134e-06, "loss": 0.0134, "step": 9000 }, { "epoch": 10.127931769722814, "grad_norm": 3.1452889442443848, "learning_rate": 8.488067226890757e-06, "loss": 0.0146, "step": 9500 }, { "epoch": 10.660980810234541, "grad_norm": 3.0432803630828857, "learning_rate": 8.404033613445379e-06, "loss": 0.0121, "step": 10000 }, { "epoch": 10.660980810234541, "eval_loss": 1.180967926979065, "eval_runtime": 301.0478, "eval_samples_per_second": 2.089, "eval_steps_per_second": 2.089, "eval_wer": 108.10810810810811, "step": 10000 }, { "epoch": 11.194029850746269, "grad_norm": 2.229727268218994, "learning_rate": 8.32e-06, "loss": 0.0119, "step": 10500 }, { "epoch": 11.727078891257996, "grad_norm": 4.256361484527588, "learning_rate": 8.235966386554623e-06, "loss": 0.0112, "step": 11000 }, { "epoch": 12.260127931769723, "grad_norm": 1.0416252613067627, "learning_rate": 8.151932773109244e-06, "loss": 0.0106, "step": 11500 }, { "epoch": 12.79317697228145, "grad_norm": 2.5392019748687744, "learning_rate": 8.067899159663867e-06, "loss": 0.0101, "step": 12000 }, { "epoch": 13.326226012793176, "grad_norm": 3.5557861328125, "learning_rate": 7.98403361344538e-06, "loss": 0.0087, "step": 12500 }, { "epoch": 13.859275053304904, "grad_norm": 3.0016672611236572, "learning_rate": 7.900168067226891e-06, "loss": 0.0096, "step": 13000 }, { "epoch": 14.392324093816631, "grad_norm": 4.090542793273926, "learning_rate": 7.816302521008404e-06, "loss": 0.0082, "step": 13500 }, { "epoch": 14.925373134328359, "grad_norm": 3.5574986934661865, "learning_rate": 7.732268907563026e-06, "loss": 0.0085, "step": 14000 }, { "epoch": 15.458422174840086, "grad_norm": 0.5061938166618347, "learning_rate": 7.648235294117647e-06, "loss": 0.0078, "step": 14500 }, { "epoch": 15.991471215351812, "grad_norm": 2.1827197074890137, "learning_rate": 7.564201680672269e-06, "loss": 0.008, "step": 15000 }, { "epoch": 15.991471215351812, "eval_loss": 1.2772396802902222, "eval_runtime": 307.2094, "eval_samples_per_second": 2.047, "eval_steps_per_second": 2.047, "eval_wer": 110.65182829888711, "step": 15000 }, { "epoch": 16.52452025586354, "grad_norm": 1.1337757110595703, "learning_rate": 7.480168067226892e-06, "loss": 0.0066, "step": 15500 }, { "epoch": 17.057569296375267, "grad_norm": 2.044987678527832, "learning_rate": 7.396134453781513e-06, "loss": 0.007, "step": 16000 }, { "epoch": 17.590618336886994, "grad_norm": 2.3151865005493164, "learning_rate": 7.312100840336135e-06, "loss": 0.0059, "step": 16500 }, { "epoch": 18.12366737739872, "grad_norm": 0.7851815223693848, "learning_rate": 7.228067226890757e-06, "loss": 0.0065, "step": 17000 }, { "epoch": 18.65671641791045, "grad_norm": 1.8914296627044678, "learning_rate": 7.144033613445379e-06, "loss": 0.0058, "step": 17500 }, { "epoch": 19.189765458422176, "grad_norm": 1.666257619857788, "learning_rate": 7.06e-06, "loss": 0.0059, "step": 18000 }, { "epoch": 19.722814498933904, "grad_norm": 1.1958131790161133, "learning_rate": 6.975966386554622e-06, "loss": 0.0058, "step": 18500 }, { "epoch": 20.255863539445627, "grad_norm": 2.6292991638183594, "learning_rate": 6.891932773109245e-06, "loss": 0.0046, "step": 19000 }, { "epoch": 20.788912579957355, "grad_norm": 3.294924736022949, "learning_rate": 6.807899159663867e-06, "loss": 0.0048, "step": 19500 }, { "epoch": 21.321961620469082, "grad_norm": 3.993171453475952, "learning_rate": 6.723865546218487e-06, "loss": 0.0049, "step": 20000 }, { "epoch": 21.321961620469082, "eval_loss": 1.311843991279602, "eval_runtime": 310.2024, "eval_samples_per_second": 2.028, "eval_steps_per_second": 2.028, "eval_wer": 110.96979332273449, "step": 20000 }, { "epoch": 21.85501066098081, "grad_norm": 2.863147020339966, "learning_rate": 6.640000000000001e-06, "loss": 0.0044, "step": 20500 }, { "epoch": 22.388059701492537, "grad_norm": 3.074666976928711, "learning_rate": 6.555966386554622e-06, "loss": 0.0045, "step": 21000 }, { "epoch": 22.921108742004265, "grad_norm": 3.2533252239227295, "learning_rate": 6.471932773109244e-06, "loss": 0.0045, "step": 21500 }, { "epoch": 23.454157782515992, "grad_norm": 0.5005853176116943, "learning_rate": 6.387899159663867e-06, "loss": 0.0037, "step": 22000 }, { "epoch": 23.98720682302772, "grad_norm": 0.6059980988502502, "learning_rate": 6.303865546218488e-06, "loss": 0.0044, "step": 22500 }, { "epoch": 24.520255863539447, "grad_norm": 2.694021701812744, "learning_rate": 6.219831932773109e-06, "loss": 0.0035, "step": 23000 }, { "epoch": 25.053304904051174, "grad_norm": 4.914281845092773, "learning_rate": 6.1357983193277316e-06, "loss": 0.0036, "step": 23500 }, { "epoch": 25.5863539445629, "grad_norm": 0.22623255848884583, "learning_rate": 6.051764705882354e-06, "loss": 0.003, "step": 24000 }, { "epoch": 26.119402985074625, "grad_norm": 1.5711487531661987, "learning_rate": 5.967731092436976e-06, "loss": 0.0032, "step": 24500 }, { "epoch": 26.652452025586353, "grad_norm": 0.28046905994415283, "learning_rate": 5.883865546218488e-06, "loss": 0.0035, "step": 25000 }, { "epoch": 26.652452025586353, "eval_loss": 1.3698959350585938, "eval_runtime": 302.8259, "eval_samples_per_second": 2.077, "eval_steps_per_second": 2.077, "eval_wer": 108.10810810810811, "step": 25000 }, { "epoch": 27.18550106609808, "grad_norm": 2.411724090576172, "learning_rate": 5.79983193277311e-06, "loss": 0.0031, "step": 25500 }, { "epoch": 27.718550106609808, "grad_norm": 0.6550215482711792, "learning_rate": 5.715798319327731e-06, "loss": 0.0028, "step": 26000 }, { "epoch": 28.251599147121535, "grad_norm": 1.863209843635559, "learning_rate": 5.631764705882354e-06, "loss": 0.003, "step": 26500 }, { "epoch": 28.784648187633262, "grad_norm": 1.5927518606185913, "learning_rate": 5.547731092436976e-06, "loss": 0.0025, "step": 27000 }, { "epoch": 29.31769722814499, "grad_norm": 0.14862287044525146, "learning_rate": 5.463697478991597e-06, "loss": 0.0024, "step": 27500 }, { "epoch": 29.850746268656717, "grad_norm": 0.7016921043395996, "learning_rate": 5.37983193277311e-06, "loss": 0.0026, "step": 28000 }, { "epoch": 30.383795309168445, "grad_norm": 0.9124877452850342, "learning_rate": 5.295798319327732e-06, "loss": 0.0019, "step": 28500 }, { "epoch": 30.916844349680172, "grad_norm": 0.1633589118719101, "learning_rate": 5.211932773109244e-06, "loss": 0.0025, "step": 29000 }, { "epoch": 31.449893390191896, "grad_norm": 0.8394401669502258, "learning_rate": 5.127899159663866e-06, "loss": 0.0022, "step": 29500 }, { "epoch": 31.982942430703623, "grad_norm": 1.288855791091919, "learning_rate": 5.043865546218488e-06, "loss": 0.0027, "step": 30000 }, { "epoch": 31.982942430703623, "eval_loss": 1.3588825464248657, "eval_runtime": 302.1181, "eval_samples_per_second": 2.082, "eval_steps_per_second": 2.082, "eval_wer": 109.37996820349763, "step": 30000 }, { "epoch": 32.51599147121535, "grad_norm": 0.12793661653995514, "learning_rate": 4.9598319327731096e-06, "loss": 0.002, "step": 30500 }, { "epoch": 33.04904051172708, "grad_norm": 3.8341634273529053, "learning_rate": 4.875798319327732e-06, "loss": 0.002, "step": 31000 }, { "epoch": 33.582089552238806, "grad_norm": 0.32009947299957275, "learning_rate": 4.791764705882353e-06, "loss": 0.0015, "step": 31500 }, { "epoch": 34.11513859275053, "grad_norm": 2.231820583343506, "learning_rate": 4.707731092436975e-06, "loss": 0.0019, "step": 32000 }, { "epoch": 34.64818763326226, "grad_norm": 0.7585958242416382, "learning_rate": 4.623697478991598e-06, "loss": 0.0014, "step": 32500 }, { "epoch": 35.18123667377399, "grad_norm": 0.08880181610584259, "learning_rate": 4.539831932773109e-06, "loss": 0.0015, "step": 33000 }, { "epoch": 35.714285714285715, "grad_norm": 0.26556718349456787, "learning_rate": 4.455966386554622e-06, "loss": 0.0016, "step": 33500 }, { "epoch": 36.24733475479744, "grad_norm": 0.04059808701276779, "learning_rate": 4.371932773109244e-06, "loss": 0.0015, "step": 34000 }, { "epoch": 36.78038379530917, "grad_norm": 0.6713038682937622, "learning_rate": 4.2878991596638655e-06, "loss": 0.0013, "step": 34500 }, { "epoch": 37.3134328358209, "grad_norm": 0.15536823868751526, "learning_rate": 4.203865546218488e-06, "loss": 0.0021, "step": 35000 }, { "epoch": 37.3134328358209, "eval_loss": 1.4547514915466309, "eval_runtime": 304.3882, "eval_samples_per_second": 2.066, "eval_steps_per_second": 2.066, "eval_wer": 108.90302066772655, "step": 35000 }, { "epoch": 37.846481876332625, "grad_norm": 0.045942340046167374, "learning_rate": 4.119831932773109e-06, "loss": 0.0014, "step": 35500 }, { "epoch": 38.37953091684435, "grad_norm": 0.2848321199417114, "learning_rate": 4.035798319327731e-06, "loss": 0.0013, "step": 36000 }, { "epoch": 38.91257995735608, "grad_norm": 0.05061887204647064, "learning_rate": 3.9517647058823536e-06, "loss": 0.001, "step": 36500 }, { "epoch": 39.44562899786781, "grad_norm": 0.20606639981269836, "learning_rate": 3.867899159663866e-06, "loss": 0.0011, "step": 37000 }, { "epoch": 39.97867803837953, "grad_norm": 0.23128168284893036, "learning_rate": 3.7838655462184875e-06, "loss": 0.0011, "step": 37500 }, { "epoch": 40.511727078891255, "grad_norm": 0.08404552936553955, "learning_rate": 3.6998319327731098e-06, "loss": 0.001, "step": 38000 }, { "epoch": 41.04477611940298, "grad_norm": 0.14134559035301208, "learning_rate": 3.615798319327731e-06, "loss": 0.0013, "step": 38500 }, { "epoch": 41.57782515991471, "grad_norm": 0.5000337362289429, "learning_rate": 3.5317647058823534e-06, "loss": 0.0011, "step": 39000 }, { "epoch": 42.11087420042644, "grad_norm": 0.1047857478260994, "learning_rate": 3.4478991596638655e-06, "loss": 0.0008, "step": 39500 }, { "epoch": 42.643923240938165, "grad_norm": 0.8483943343162537, "learning_rate": 3.3638655462184878e-06, "loss": 0.0008, "step": 40000 }, { "epoch": 42.643923240938165, "eval_loss": 1.484838843345642, "eval_runtime": 303.2812, "eval_samples_per_second": 2.074, "eval_steps_per_second": 2.074, "eval_wer": 108.2670906200318, "step": 40000 }, { "epoch": 43.17697228144989, "grad_norm": 0.036445315927267075, "learning_rate": 3.279831932773109e-06, "loss": 0.0007, "step": 40500 }, { "epoch": 43.71002132196162, "grad_norm": 0.026431705802679062, "learning_rate": 3.1957983193277313e-06, "loss": 0.0007, "step": 41000 }, { "epoch": 44.24307036247335, "grad_norm": 0.05968892574310303, "learning_rate": 3.111764705882353e-06, "loss": 0.0008, "step": 41500 }, { "epoch": 44.776119402985074, "grad_norm": 0.2520725429058075, "learning_rate": 3.0278991596638657e-06, "loss": 0.0008, "step": 42000 }, { "epoch": 45.3091684434968, "grad_norm": 0.012703795917332172, "learning_rate": 2.943865546218488e-06, "loss": 0.0005, "step": 42500 }, { "epoch": 45.84221748400853, "grad_norm": 0.07976607233285904, "learning_rate": 2.8598319327731093e-06, "loss": 0.0008, "step": 43000 }, { "epoch": 46.375266524520256, "grad_norm": 0.023941069841384888, "learning_rate": 2.7757983193277316e-06, "loss": 0.0006, "step": 43500 }, { "epoch": 46.908315565031984, "grad_norm": 2.228559732437134, "learning_rate": 2.6917647058823534e-06, "loss": 0.0005, "step": 44000 }, { "epoch": 47.44136460554371, "grad_norm": 0.04434338957071304, "learning_rate": 2.607899159663866e-06, "loss": 0.0004, "step": 44500 }, { "epoch": 47.97441364605544, "grad_norm": 0.08504273742437363, "learning_rate": 2.5238655462184873e-06, "loss": 0.0005, "step": 45000 }, { "epoch": 47.97441364605544, "eval_loss": 1.5823848247528076, "eval_runtime": 304.8566, "eval_samples_per_second": 2.063, "eval_steps_per_second": 2.063, "eval_wer": 108.2670906200318, "step": 45000 }, { "epoch": 48.507462686567166, "grad_norm": 0.06245756521821022, "learning_rate": 2.439831932773109e-06, "loss": 0.0005, "step": 45500 }, { "epoch": 49.04051172707889, "grad_norm": 0.007986600510776043, "learning_rate": 2.3557983193277313e-06, "loss": 0.0002, "step": 46000 }, { "epoch": 49.57356076759062, "grad_norm": 0.005643850192427635, "learning_rate": 2.271764705882353e-06, "loss": 0.0002, "step": 46500 }, { "epoch": 50.10660980810235, "grad_norm": 0.011920343153178692, "learning_rate": 2.187731092436975e-06, "loss": 0.0002, "step": 47000 }, { "epoch": 50.639658848614076, "grad_norm": 0.02385249361395836, "learning_rate": 2.1036974789915967e-06, "loss": 0.0002, "step": 47500 }, { "epoch": 51.172707889125796, "grad_norm": 0.005474976263940334, "learning_rate": 2.0196638655462185e-06, "loss": 0.0003, "step": 48000 }, { "epoch": 51.70575692963752, "grad_norm": 0.024762336164712906, "learning_rate": 1.935798319327731e-06, "loss": 0.0002, "step": 48500 }, { "epoch": 52.23880597014925, "grad_norm": 0.0038718734867870808, "learning_rate": 1.8517647058823531e-06, "loss": 0.0002, "step": 49000 }, { "epoch": 52.77185501066098, "grad_norm": 0.028253620490431786, "learning_rate": 1.7677310924369751e-06, "loss": 0.0002, "step": 49500 }, { "epoch": 53.304904051172706, "grad_norm": 0.004596550948917866, "learning_rate": 1.683697478991597e-06, "loss": 0.0001, "step": 50000 }, { "epoch": 53.304904051172706, "eval_loss": 1.5414237976074219, "eval_runtime": 304.2868, "eval_samples_per_second": 2.067, "eval_steps_per_second": 2.067, "eval_wer": 108.74403815580287, "step": 50000 }, { "epoch": 53.83795309168443, "grad_norm": 0.003750466974452138, "learning_rate": 1.5996638655462185e-06, "loss": 0.0002, "step": 50500 }, { "epoch": 54.37100213219616, "grad_norm": 0.017754871398210526, "learning_rate": 1.5156302521008403e-06, "loss": 0.0003, "step": 51000 }, { "epoch": 54.90405117270789, "grad_norm": 0.010330211371183395, "learning_rate": 1.4315966386554623e-06, "loss": 0.0002, "step": 51500 }, { "epoch": 55.437100213219615, "grad_norm": 0.007948646321892738, "learning_rate": 1.3475630252100841e-06, "loss": 0.0002, "step": 52000 }, { "epoch": 55.97014925373134, "grad_norm": 0.008005546405911446, "learning_rate": 1.263529411764706e-06, "loss": 0.0003, "step": 52500 }, { "epoch": 56.50319829424307, "grad_norm": 0.005044952500611544, "learning_rate": 1.1794957983193277e-06, "loss": 0.0001, "step": 53000 }, { "epoch": 57.0362473347548, "grad_norm": 0.004209898877888918, "learning_rate": 1.0954621848739497e-06, "loss": 0.0001, "step": 53500 }, { "epoch": 57.569296375266525, "grad_norm": 0.002882936969399452, "learning_rate": 1.0114285714285715e-06, "loss": 0.0001, "step": 54000 }, { "epoch": 58.10234541577825, "grad_norm": 0.004143028520047665, "learning_rate": 9.275630252100841e-07, "loss": 0.0001, "step": 54500 }, { "epoch": 58.63539445628998, "grad_norm": 0.002750619314610958, "learning_rate": 8.436974789915966e-07, "loss": 0.0001, "step": 55000 }, { "epoch": 58.63539445628998, "eval_loss": 1.5718265771865845, "eval_runtime": 306.4329, "eval_samples_per_second": 2.053, "eval_steps_per_second": 2.053, "eval_wer": 112.241653418124, "step": 55000 }, { "epoch": 59.16844349680171, "grad_norm": 0.003843628568574786, "learning_rate": 7.596638655462185e-07, "loss": 0.0001, "step": 55500 }, { "epoch": 59.701492537313435, "grad_norm": 0.002774209715425968, "learning_rate": 6.756302521008404e-07, "loss": 0.0001, "step": 56000 }, { "epoch": 60.23454157782516, "grad_norm": 0.0018511614762246609, "learning_rate": 5.915966386554622e-07, "loss": 0.0, "step": 56500 }, { "epoch": 60.76759061833689, "grad_norm": 0.004432315472513437, "learning_rate": 5.075630252100841e-07, "loss": 0.0, "step": 57000 }, { "epoch": 61.30063965884862, "grad_norm": 0.001999880885705352, "learning_rate": 4.235294117647059e-07, "loss": 0.0, "step": 57500 }, { "epoch": 61.833688699360344, "grad_norm": 0.0016811841633170843, "learning_rate": 3.3949579831932776e-07, "loss": 0.0, "step": 58000 }, { "epoch": 62.36673773987207, "grad_norm": 0.0018331070896238089, "learning_rate": 2.554621848739496e-07, "loss": 0.0, "step": 58500 }, { "epoch": 62.89978678038379, "grad_norm": 0.0016816870775073767, "learning_rate": 1.7142857142857146e-07, "loss": 0.0, "step": 59000 }, { "epoch": 63.43283582089552, "grad_norm": 0.001792965573258698, "learning_rate": 8.739495798319328e-08, "loss": 0.0, "step": 59500 }, { "epoch": 63.96588486140725, "grad_norm": 0.0018047387711703777, "learning_rate": 3.3613445378151263e-09, "loss": 0.0, "step": 60000 }, { "epoch": 63.96588486140725, "eval_loss": 1.597470760345459, "eval_runtime": 305.909, "eval_samples_per_second": 2.056, "eval_steps_per_second": 2.056, "eval_wer": 111.44674085850556, "step": 60000 } ], "logging_steps": 500, "max_steps": 60000, "num_input_tokens_seen": 0, "num_train_epochs": 64, "save_steps": 5000, "total_flos": 2.7687835638521856e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }