ipa-whisper-small / trainer_state.json
neurlang's picture
Add ipa-whisper-small model
0ea0a9d verified
{
"best_metric": 108.10810810810811,
"best_model_checkpoint": "whisper-small-sk-timestamp/checkpoint-10000",
"epoch": 63.96588486140725,
"eval_steps": 5000,
"global_step": 60000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5330490405117271,
"grad_norm": 6.951908588409424,
"learning_rate": 9.980000000000001e-06,
"loss": 0.7674,
"step": 500
},
{
"epoch": 1.0660980810234542,
"grad_norm": 5.4410576820373535,
"learning_rate": 9.916134453781513e-06,
"loss": 0.6034,
"step": 1000
},
{
"epoch": 1.5991471215351813,
"grad_norm": 6.041841506958008,
"learning_rate": 9.832100840336136e-06,
"loss": 0.4504,
"step": 1500
},
{
"epoch": 2.1321961620469083,
"grad_norm": 6.625973701477051,
"learning_rate": 9.748067226890757e-06,
"loss": 0.3987,
"step": 2000
},
{
"epoch": 2.6652452025586353,
"grad_norm": 7.889552593231201,
"learning_rate": 9.664201680672269e-06,
"loss": 0.258,
"step": 2500
},
{
"epoch": 3.1982942430703627,
"grad_norm": 5.555081367492676,
"learning_rate": 9.580168067226892e-06,
"loss": 0.213,
"step": 3000
},
{
"epoch": 3.7313432835820897,
"grad_norm": 4.576934337615967,
"learning_rate": 9.496134453781513e-06,
"loss": 0.127,
"step": 3500
},
{
"epoch": 4.264392324093817,
"grad_norm": 4.3578410148620605,
"learning_rate": 9.412100840336135e-06,
"loss": 0.0974,
"step": 4000
},
{
"epoch": 4.797441364605544,
"grad_norm": 3.2350175380706787,
"learning_rate": 9.328067226890758e-06,
"loss": 0.063,
"step": 4500
},
{
"epoch": 5.330490405117271,
"grad_norm": 3.677992820739746,
"learning_rate": 9.244033613445379e-06,
"loss": 0.0459,
"step": 5000
},
{
"epoch": 5.330490405117271,
"eval_loss": 1.0499684810638428,
"eval_runtime": 309.3855,
"eval_samples_per_second": 2.033,
"eval_steps_per_second": 2.033,
"eval_wer": 121.7806041335453,
"step": 5000
},
{
"epoch": 5.863539445628998,
"grad_norm": 6.1787638664245605,
"learning_rate": 9.16016806722689e-06,
"loss": 0.0359,
"step": 5500
},
{
"epoch": 6.396588486140725,
"grad_norm": 5.001561641693115,
"learning_rate": 9.076134453781514e-06,
"loss": 0.0277,
"step": 6000
},
{
"epoch": 6.929637526652452,
"grad_norm": 3.8890867233276367,
"learning_rate": 8.992100840336135e-06,
"loss": 0.0262,
"step": 6500
},
{
"epoch": 7.462686567164179,
"grad_norm": 3.166557788848877,
"learning_rate": 8.908067226890758e-06,
"loss": 0.0191,
"step": 7000
},
{
"epoch": 7.995735607675906,
"grad_norm": 2.1672661304473877,
"learning_rate": 8.824033613445378e-06,
"loss": 0.0206,
"step": 7500
},
{
"epoch": 8.528784648187633,
"grad_norm": 2.7271361351013184,
"learning_rate": 8.740000000000001e-06,
"loss": 0.015,
"step": 8000
},
{
"epoch": 9.06183368869936,
"grad_norm": 2.9740970134735107,
"learning_rate": 8.656134453781513e-06,
"loss": 0.0162,
"step": 8500
},
{
"epoch": 9.594882729211088,
"grad_norm": 7.9420881271362305,
"learning_rate": 8.572100840336134e-06,
"loss": 0.0134,
"step": 9000
},
{
"epoch": 10.127931769722814,
"grad_norm": 3.1452889442443848,
"learning_rate": 8.488067226890757e-06,
"loss": 0.0146,
"step": 9500
},
{
"epoch": 10.660980810234541,
"grad_norm": 3.0432803630828857,
"learning_rate": 8.404033613445379e-06,
"loss": 0.0121,
"step": 10000
},
{
"epoch": 10.660980810234541,
"eval_loss": 1.180967926979065,
"eval_runtime": 301.0478,
"eval_samples_per_second": 2.089,
"eval_steps_per_second": 2.089,
"eval_wer": 108.10810810810811,
"step": 10000
},
{
"epoch": 11.194029850746269,
"grad_norm": 2.229727268218994,
"learning_rate": 8.32e-06,
"loss": 0.0119,
"step": 10500
},
{
"epoch": 11.727078891257996,
"grad_norm": 4.256361484527588,
"learning_rate": 8.235966386554623e-06,
"loss": 0.0112,
"step": 11000
},
{
"epoch": 12.260127931769723,
"grad_norm": 1.0416252613067627,
"learning_rate": 8.151932773109244e-06,
"loss": 0.0106,
"step": 11500
},
{
"epoch": 12.79317697228145,
"grad_norm": 2.5392019748687744,
"learning_rate": 8.067899159663867e-06,
"loss": 0.0101,
"step": 12000
},
{
"epoch": 13.326226012793176,
"grad_norm": 3.5557861328125,
"learning_rate": 7.98403361344538e-06,
"loss": 0.0087,
"step": 12500
},
{
"epoch": 13.859275053304904,
"grad_norm": 3.0016672611236572,
"learning_rate": 7.900168067226891e-06,
"loss": 0.0096,
"step": 13000
},
{
"epoch": 14.392324093816631,
"grad_norm": 4.090542793273926,
"learning_rate": 7.816302521008404e-06,
"loss": 0.0082,
"step": 13500
},
{
"epoch": 14.925373134328359,
"grad_norm": 3.5574986934661865,
"learning_rate": 7.732268907563026e-06,
"loss": 0.0085,
"step": 14000
},
{
"epoch": 15.458422174840086,
"grad_norm": 0.5061938166618347,
"learning_rate": 7.648235294117647e-06,
"loss": 0.0078,
"step": 14500
},
{
"epoch": 15.991471215351812,
"grad_norm": 2.1827197074890137,
"learning_rate": 7.564201680672269e-06,
"loss": 0.008,
"step": 15000
},
{
"epoch": 15.991471215351812,
"eval_loss": 1.2772396802902222,
"eval_runtime": 307.2094,
"eval_samples_per_second": 2.047,
"eval_steps_per_second": 2.047,
"eval_wer": 110.65182829888711,
"step": 15000
},
{
"epoch": 16.52452025586354,
"grad_norm": 1.1337757110595703,
"learning_rate": 7.480168067226892e-06,
"loss": 0.0066,
"step": 15500
},
{
"epoch": 17.057569296375267,
"grad_norm": 2.044987678527832,
"learning_rate": 7.396134453781513e-06,
"loss": 0.007,
"step": 16000
},
{
"epoch": 17.590618336886994,
"grad_norm": 2.3151865005493164,
"learning_rate": 7.312100840336135e-06,
"loss": 0.0059,
"step": 16500
},
{
"epoch": 18.12366737739872,
"grad_norm": 0.7851815223693848,
"learning_rate": 7.228067226890757e-06,
"loss": 0.0065,
"step": 17000
},
{
"epoch": 18.65671641791045,
"grad_norm": 1.8914296627044678,
"learning_rate": 7.144033613445379e-06,
"loss": 0.0058,
"step": 17500
},
{
"epoch": 19.189765458422176,
"grad_norm": 1.666257619857788,
"learning_rate": 7.06e-06,
"loss": 0.0059,
"step": 18000
},
{
"epoch": 19.722814498933904,
"grad_norm": 1.1958131790161133,
"learning_rate": 6.975966386554622e-06,
"loss": 0.0058,
"step": 18500
},
{
"epoch": 20.255863539445627,
"grad_norm": 2.6292991638183594,
"learning_rate": 6.891932773109245e-06,
"loss": 0.0046,
"step": 19000
},
{
"epoch": 20.788912579957355,
"grad_norm": 3.294924736022949,
"learning_rate": 6.807899159663867e-06,
"loss": 0.0048,
"step": 19500
},
{
"epoch": 21.321961620469082,
"grad_norm": 3.993171453475952,
"learning_rate": 6.723865546218487e-06,
"loss": 0.0049,
"step": 20000
},
{
"epoch": 21.321961620469082,
"eval_loss": 1.311843991279602,
"eval_runtime": 310.2024,
"eval_samples_per_second": 2.028,
"eval_steps_per_second": 2.028,
"eval_wer": 110.96979332273449,
"step": 20000
},
{
"epoch": 21.85501066098081,
"grad_norm": 2.863147020339966,
"learning_rate": 6.640000000000001e-06,
"loss": 0.0044,
"step": 20500
},
{
"epoch": 22.388059701492537,
"grad_norm": 3.074666976928711,
"learning_rate": 6.555966386554622e-06,
"loss": 0.0045,
"step": 21000
},
{
"epoch": 22.921108742004265,
"grad_norm": 3.2533252239227295,
"learning_rate": 6.471932773109244e-06,
"loss": 0.0045,
"step": 21500
},
{
"epoch": 23.454157782515992,
"grad_norm": 0.5005853176116943,
"learning_rate": 6.387899159663867e-06,
"loss": 0.0037,
"step": 22000
},
{
"epoch": 23.98720682302772,
"grad_norm": 0.6059980988502502,
"learning_rate": 6.303865546218488e-06,
"loss": 0.0044,
"step": 22500
},
{
"epoch": 24.520255863539447,
"grad_norm": 2.694021701812744,
"learning_rate": 6.219831932773109e-06,
"loss": 0.0035,
"step": 23000
},
{
"epoch": 25.053304904051174,
"grad_norm": 4.914281845092773,
"learning_rate": 6.1357983193277316e-06,
"loss": 0.0036,
"step": 23500
},
{
"epoch": 25.5863539445629,
"grad_norm": 0.22623255848884583,
"learning_rate": 6.051764705882354e-06,
"loss": 0.003,
"step": 24000
},
{
"epoch": 26.119402985074625,
"grad_norm": 1.5711487531661987,
"learning_rate": 5.967731092436976e-06,
"loss": 0.0032,
"step": 24500
},
{
"epoch": 26.652452025586353,
"grad_norm": 0.28046905994415283,
"learning_rate": 5.883865546218488e-06,
"loss": 0.0035,
"step": 25000
},
{
"epoch": 26.652452025586353,
"eval_loss": 1.3698959350585938,
"eval_runtime": 302.8259,
"eval_samples_per_second": 2.077,
"eval_steps_per_second": 2.077,
"eval_wer": 108.10810810810811,
"step": 25000
},
{
"epoch": 27.18550106609808,
"grad_norm": 2.411724090576172,
"learning_rate": 5.79983193277311e-06,
"loss": 0.0031,
"step": 25500
},
{
"epoch": 27.718550106609808,
"grad_norm": 0.6550215482711792,
"learning_rate": 5.715798319327731e-06,
"loss": 0.0028,
"step": 26000
},
{
"epoch": 28.251599147121535,
"grad_norm": 1.863209843635559,
"learning_rate": 5.631764705882354e-06,
"loss": 0.003,
"step": 26500
},
{
"epoch": 28.784648187633262,
"grad_norm": 1.5927518606185913,
"learning_rate": 5.547731092436976e-06,
"loss": 0.0025,
"step": 27000
},
{
"epoch": 29.31769722814499,
"grad_norm": 0.14862287044525146,
"learning_rate": 5.463697478991597e-06,
"loss": 0.0024,
"step": 27500
},
{
"epoch": 29.850746268656717,
"grad_norm": 0.7016921043395996,
"learning_rate": 5.37983193277311e-06,
"loss": 0.0026,
"step": 28000
},
{
"epoch": 30.383795309168445,
"grad_norm": 0.9124877452850342,
"learning_rate": 5.295798319327732e-06,
"loss": 0.0019,
"step": 28500
},
{
"epoch": 30.916844349680172,
"grad_norm": 0.1633589118719101,
"learning_rate": 5.211932773109244e-06,
"loss": 0.0025,
"step": 29000
},
{
"epoch": 31.449893390191896,
"grad_norm": 0.8394401669502258,
"learning_rate": 5.127899159663866e-06,
"loss": 0.0022,
"step": 29500
},
{
"epoch": 31.982942430703623,
"grad_norm": 1.288855791091919,
"learning_rate": 5.043865546218488e-06,
"loss": 0.0027,
"step": 30000
},
{
"epoch": 31.982942430703623,
"eval_loss": 1.3588825464248657,
"eval_runtime": 302.1181,
"eval_samples_per_second": 2.082,
"eval_steps_per_second": 2.082,
"eval_wer": 109.37996820349763,
"step": 30000
},
{
"epoch": 32.51599147121535,
"grad_norm": 0.12793661653995514,
"learning_rate": 4.9598319327731096e-06,
"loss": 0.002,
"step": 30500
},
{
"epoch": 33.04904051172708,
"grad_norm": 3.8341634273529053,
"learning_rate": 4.875798319327732e-06,
"loss": 0.002,
"step": 31000
},
{
"epoch": 33.582089552238806,
"grad_norm": 0.32009947299957275,
"learning_rate": 4.791764705882353e-06,
"loss": 0.0015,
"step": 31500
},
{
"epoch": 34.11513859275053,
"grad_norm": 2.231820583343506,
"learning_rate": 4.707731092436975e-06,
"loss": 0.0019,
"step": 32000
},
{
"epoch": 34.64818763326226,
"grad_norm": 0.7585958242416382,
"learning_rate": 4.623697478991598e-06,
"loss": 0.0014,
"step": 32500
},
{
"epoch": 35.18123667377399,
"grad_norm": 0.08880181610584259,
"learning_rate": 4.539831932773109e-06,
"loss": 0.0015,
"step": 33000
},
{
"epoch": 35.714285714285715,
"grad_norm": 0.26556718349456787,
"learning_rate": 4.455966386554622e-06,
"loss": 0.0016,
"step": 33500
},
{
"epoch": 36.24733475479744,
"grad_norm": 0.04059808701276779,
"learning_rate": 4.371932773109244e-06,
"loss": 0.0015,
"step": 34000
},
{
"epoch": 36.78038379530917,
"grad_norm": 0.6713038682937622,
"learning_rate": 4.2878991596638655e-06,
"loss": 0.0013,
"step": 34500
},
{
"epoch": 37.3134328358209,
"grad_norm": 0.15536823868751526,
"learning_rate": 4.203865546218488e-06,
"loss": 0.0021,
"step": 35000
},
{
"epoch": 37.3134328358209,
"eval_loss": 1.4547514915466309,
"eval_runtime": 304.3882,
"eval_samples_per_second": 2.066,
"eval_steps_per_second": 2.066,
"eval_wer": 108.90302066772655,
"step": 35000
},
{
"epoch": 37.846481876332625,
"grad_norm": 0.045942340046167374,
"learning_rate": 4.119831932773109e-06,
"loss": 0.0014,
"step": 35500
},
{
"epoch": 38.37953091684435,
"grad_norm": 0.2848321199417114,
"learning_rate": 4.035798319327731e-06,
"loss": 0.0013,
"step": 36000
},
{
"epoch": 38.91257995735608,
"grad_norm": 0.05061887204647064,
"learning_rate": 3.9517647058823536e-06,
"loss": 0.001,
"step": 36500
},
{
"epoch": 39.44562899786781,
"grad_norm": 0.20606639981269836,
"learning_rate": 3.867899159663866e-06,
"loss": 0.0011,
"step": 37000
},
{
"epoch": 39.97867803837953,
"grad_norm": 0.23128168284893036,
"learning_rate": 3.7838655462184875e-06,
"loss": 0.0011,
"step": 37500
},
{
"epoch": 40.511727078891255,
"grad_norm": 0.08404552936553955,
"learning_rate": 3.6998319327731098e-06,
"loss": 0.001,
"step": 38000
},
{
"epoch": 41.04477611940298,
"grad_norm": 0.14134559035301208,
"learning_rate": 3.615798319327731e-06,
"loss": 0.0013,
"step": 38500
},
{
"epoch": 41.57782515991471,
"grad_norm": 0.5000337362289429,
"learning_rate": 3.5317647058823534e-06,
"loss": 0.0011,
"step": 39000
},
{
"epoch": 42.11087420042644,
"grad_norm": 0.1047857478260994,
"learning_rate": 3.4478991596638655e-06,
"loss": 0.0008,
"step": 39500
},
{
"epoch": 42.643923240938165,
"grad_norm": 0.8483943343162537,
"learning_rate": 3.3638655462184878e-06,
"loss": 0.0008,
"step": 40000
},
{
"epoch": 42.643923240938165,
"eval_loss": 1.484838843345642,
"eval_runtime": 303.2812,
"eval_samples_per_second": 2.074,
"eval_steps_per_second": 2.074,
"eval_wer": 108.2670906200318,
"step": 40000
},
{
"epoch": 43.17697228144989,
"grad_norm": 0.036445315927267075,
"learning_rate": 3.279831932773109e-06,
"loss": 0.0007,
"step": 40500
},
{
"epoch": 43.71002132196162,
"grad_norm": 0.026431705802679062,
"learning_rate": 3.1957983193277313e-06,
"loss": 0.0007,
"step": 41000
},
{
"epoch": 44.24307036247335,
"grad_norm": 0.05968892574310303,
"learning_rate": 3.111764705882353e-06,
"loss": 0.0008,
"step": 41500
},
{
"epoch": 44.776119402985074,
"grad_norm": 0.2520725429058075,
"learning_rate": 3.0278991596638657e-06,
"loss": 0.0008,
"step": 42000
},
{
"epoch": 45.3091684434968,
"grad_norm": 0.012703795917332172,
"learning_rate": 2.943865546218488e-06,
"loss": 0.0005,
"step": 42500
},
{
"epoch": 45.84221748400853,
"grad_norm": 0.07976607233285904,
"learning_rate": 2.8598319327731093e-06,
"loss": 0.0008,
"step": 43000
},
{
"epoch": 46.375266524520256,
"grad_norm": 0.023941069841384888,
"learning_rate": 2.7757983193277316e-06,
"loss": 0.0006,
"step": 43500
},
{
"epoch": 46.908315565031984,
"grad_norm": 2.228559732437134,
"learning_rate": 2.6917647058823534e-06,
"loss": 0.0005,
"step": 44000
},
{
"epoch": 47.44136460554371,
"grad_norm": 0.04434338957071304,
"learning_rate": 2.607899159663866e-06,
"loss": 0.0004,
"step": 44500
},
{
"epoch": 47.97441364605544,
"grad_norm": 0.08504273742437363,
"learning_rate": 2.5238655462184873e-06,
"loss": 0.0005,
"step": 45000
},
{
"epoch": 47.97441364605544,
"eval_loss": 1.5823848247528076,
"eval_runtime": 304.8566,
"eval_samples_per_second": 2.063,
"eval_steps_per_second": 2.063,
"eval_wer": 108.2670906200318,
"step": 45000
},
{
"epoch": 48.507462686567166,
"grad_norm": 0.06245756521821022,
"learning_rate": 2.439831932773109e-06,
"loss": 0.0005,
"step": 45500
},
{
"epoch": 49.04051172707889,
"grad_norm": 0.007986600510776043,
"learning_rate": 2.3557983193277313e-06,
"loss": 0.0002,
"step": 46000
},
{
"epoch": 49.57356076759062,
"grad_norm": 0.005643850192427635,
"learning_rate": 2.271764705882353e-06,
"loss": 0.0002,
"step": 46500
},
{
"epoch": 50.10660980810235,
"grad_norm": 0.011920343153178692,
"learning_rate": 2.187731092436975e-06,
"loss": 0.0002,
"step": 47000
},
{
"epoch": 50.639658848614076,
"grad_norm": 0.02385249361395836,
"learning_rate": 2.1036974789915967e-06,
"loss": 0.0002,
"step": 47500
},
{
"epoch": 51.172707889125796,
"grad_norm": 0.005474976263940334,
"learning_rate": 2.0196638655462185e-06,
"loss": 0.0003,
"step": 48000
},
{
"epoch": 51.70575692963752,
"grad_norm": 0.024762336164712906,
"learning_rate": 1.935798319327731e-06,
"loss": 0.0002,
"step": 48500
},
{
"epoch": 52.23880597014925,
"grad_norm": 0.0038718734867870808,
"learning_rate": 1.8517647058823531e-06,
"loss": 0.0002,
"step": 49000
},
{
"epoch": 52.77185501066098,
"grad_norm": 0.028253620490431786,
"learning_rate": 1.7677310924369751e-06,
"loss": 0.0002,
"step": 49500
},
{
"epoch": 53.304904051172706,
"grad_norm": 0.004596550948917866,
"learning_rate": 1.683697478991597e-06,
"loss": 0.0001,
"step": 50000
},
{
"epoch": 53.304904051172706,
"eval_loss": 1.5414237976074219,
"eval_runtime": 304.2868,
"eval_samples_per_second": 2.067,
"eval_steps_per_second": 2.067,
"eval_wer": 108.74403815580287,
"step": 50000
},
{
"epoch": 53.83795309168443,
"grad_norm": 0.003750466974452138,
"learning_rate": 1.5996638655462185e-06,
"loss": 0.0002,
"step": 50500
},
{
"epoch": 54.37100213219616,
"grad_norm": 0.017754871398210526,
"learning_rate": 1.5156302521008403e-06,
"loss": 0.0003,
"step": 51000
},
{
"epoch": 54.90405117270789,
"grad_norm": 0.010330211371183395,
"learning_rate": 1.4315966386554623e-06,
"loss": 0.0002,
"step": 51500
},
{
"epoch": 55.437100213219615,
"grad_norm": 0.007948646321892738,
"learning_rate": 1.3475630252100841e-06,
"loss": 0.0002,
"step": 52000
},
{
"epoch": 55.97014925373134,
"grad_norm": 0.008005546405911446,
"learning_rate": 1.263529411764706e-06,
"loss": 0.0003,
"step": 52500
},
{
"epoch": 56.50319829424307,
"grad_norm": 0.005044952500611544,
"learning_rate": 1.1794957983193277e-06,
"loss": 0.0001,
"step": 53000
},
{
"epoch": 57.0362473347548,
"grad_norm": 0.004209898877888918,
"learning_rate": 1.0954621848739497e-06,
"loss": 0.0001,
"step": 53500
},
{
"epoch": 57.569296375266525,
"grad_norm": 0.002882936969399452,
"learning_rate": 1.0114285714285715e-06,
"loss": 0.0001,
"step": 54000
},
{
"epoch": 58.10234541577825,
"grad_norm": 0.004143028520047665,
"learning_rate": 9.275630252100841e-07,
"loss": 0.0001,
"step": 54500
},
{
"epoch": 58.63539445628998,
"grad_norm": 0.002750619314610958,
"learning_rate": 8.436974789915966e-07,
"loss": 0.0001,
"step": 55000
},
{
"epoch": 58.63539445628998,
"eval_loss": 1.5718265771865845,
"eval_runtime": 306.4329,
"eval_samples_per_second": 2.053,
"eval_steps_per_second": 2.053,
"eval_wer": 112.241653418124,
"step": 55000
},
{
"epoch": 59.16844349680171,
"grad_norm": 0.003843628568574786,
"learning_rate": 7.596638655462185e-07,
"loss": 0.0001,
"step": 55500
},
{
"epoch": 59.701492537313435,
"grad_norm": 0.002774209715425968,
"learning_rate": 6.756302521008404e-07,
"loss": 0.0001,
"step": 56000
},
{
"epoch": 60.23454157782516,
"grad_norm": 0.0018511614762246609,
"learning_rate": 5.915966386554622e-07,
"loss": 0.0,
"step": 56500
},
{
"epoch": 60.76759061833689,
"grad_norm": 0.004432315472513437,
"learning_rate": 5.075630252100841e-07,
"loss": 0.0,
"step": 57000
},
{
"epoch": 61.30063965884862,
"grad_norm": 0.001999880885705352,
"learning_rate": 4.235294117647059e-07,
"loss": 0.0,
"step": 57500
},
{
"epoch": 61.833688699360344,
"grad_norm": 0.0016811841633170843,
"learning_rate": 3.3949579831932776e-07,
"loss": 0.0,
"step": 58000
},
{
"epoch": 62.36673773987207,
"grad_norm": 0.0018331070896238089,
"learning_rate": 2.554621848739496e-07,
"loss": 0.0,
"step": 58500
},
{
"epoch": 62.89978678038379,
"grad_norm": 0.0016816870775073767,
"learning_rate": 1.7142857142857146e-07,
"loss": 0.0,
"step": 59000
},
{
"epoch": 63.43283582089552,
"grad_norm": 0.001792965573258698,
"learning_rate": 8.739495798319328e-08,
"loss": 0.0,
"step": 59500
},
{
"epoch": 63.96588486140725,
"grad_norm": 0.0018047387711703777,
"learning_rate": 3.3613445378151263e-09,
"loss": 0.0,
"step": 60000
},
{
"epoch": 63.96588486140725,
"eval_loss": 1.597470760345459,
"eval_runtime": 305.909,
"eval_samples_per_second": 2.056,
"eval_steps_per_second": 2.056,
"eval_wer": 111.44674085850556,
"step": 60000
}
],
"logging_steps": 500,
"max_steps": 60000,
"num_input_tokens_seen": 0,
"num_train_epochs": 64,
"save_steps": 5000,
"total_flos": 2.7687835638521856e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}