finetuned_models/bloomz7b_couplets_5e/trainer_state.json

13962 lines
277 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 23160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 7.194244604316547e-07,
"loss": 5.0794,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 1.4388489208633094e-06,
"loss": 5.0596,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 2.1582733812949645e-06,
"loss": 5.0769,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 2.877697841726619e-06,
"loss": 5.069,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 3.5971223021582732e-06,
"loss": 4.9247,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 4.316546762589929e-06,
"loss": 4.8425,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 5.035971223021583e-06,
"loss": 4.8177,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 5.755395683453238e-06,
"loss": 4.7713,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 6.474820143884892e-06,
"loss": 4.5874,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 7.1942446043165465e-06,
"loss": 4.4,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 7.913669064748202e-06,
"loss": 4.2331,
"step": 110
},
{
"epoch": 0.03,
"learning_rate": 8.633093525179858e-06,
"loss": 3.9907,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 9.352517985611512e-06,
"loss": 3.832,
"step": 130
},
{
"epoch": 0.03,
"learning_rate": 1.0071942446043167e-05,
"loss": 3.6927,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 1.0791366906474821e-05,
"loss": 3.6372,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 1.1510791366906475e-05,
"loss": 3.6589,
"step": 160
},
{
"epoch": 0.04,
"learning_rate": 1.223021582733813e-05,
"loss": 3.5743,
"step": 170
},
{
"epoch": 0.04,
"learning_rate": 1.2949640287769784e-05,
"loss": 3.5936,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 1.366906474820144e-05,
"loss": 3.5464,
"step": 190
},
{
"epoch": 0.04,
"learning_rate": 1.4388489208633093e-05,
"loss": 3.5314,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 1.5107913669064749e-05,
"loss": 3.5154,
"step": 210
},
{
"epoch": 0.05,
"learning_rate": 1.5827338129496403e-05,
"loss": 3.4843,
"step": 220
},
{
"epoch": 0.05,
"learning_rate": 1.6546762589928058e-05,
"loss": 3.5982,
"step": 230
},
{
"epoch": 0.05,
"learning_rate": 1.7266187050359716e-05,
"loss": 3.5369,
"step": 240
},
{
"epoch": 0.05,
"learning_rate": 1.7985611510791367e-05,
"loss": 3.468,
"step": 250
},
{
"epoch": 0.06,
"learning_rate": 1.8705035971223024e-05,
"loss": 3.4998,
"step": 260
},
{
"epoch": 0.06,
"learning_rate": 1.942446043165468e-05,
"loss": 3.5115,
"step": 270
},
{
"epoch": 0.06,
"learning_rate": 2.0143884892086333e-05,
"loss": 3.5545,
"step": 280
},
{
"epoch": 0.06,
"learning_rate": 2.0863309352517988e-05,
"loss": 3.4808,
"step": 290
},
{
"epoch": 0.06,
"learning_rate": 2.1582733812949642e-05,
"loss": 3.4455,
"step": 300
},
{
"epoch": 0.07,
"learning_rate": 2.2302158273381296e-05,
"loss": 3.4575,
"step": 310
},
{
"epoch": 0.07,
"learning_rate": 2.302158273381295e-05,
"loss": 3.4486,
"step": 320
},
{
"epoch": 0.07,
"learning_rate": 2.3741007194244605e-05,
"loss": 3.3872,
"step": 330
},
{
"epoch": 0.07,
"learning_rate": 2.446043165467626e-05,
"loss": 3.4609,
"step": 340
},
{
"epoch": 0.08,
"learning_rate": 2.5179856115107914e-05,
"loss": 3.4621,
"step": 350
},
{
"epoch": 0.08,
"learning_rate": 2.589928057553957e-05,
"loss": 3.4752,
"step": 360
},
{
"epoch": 0.08,
"learning_rate": 2.6618705035971226e-05,
"loss": 3.4315,
"step": 370
},
{
"epoch": 0.08,
"learning_rate": 2.733812949640288e-05,
"loss": 3.3985,
"step": 380
},
{
"epoch": 0.08,
"learning_rate": 2.805755395683453e-05,
"loss": 3.4633,
"step": 390
},
{
"epoch": 0.09,
"learning_rate": 2.8776978417266186e-05,
"loss": 3.4653,
"step": 400
},
{
"epoch": 0.09,
"learning_rate": 2.9496402877697844e-05,
"loss": 3.4259,
"step": 410
},
{
"epoch": 0.09,
"learning_rate": 3.0215827338129498e-05,
"loss": 3.4427,
"step": 420
},
{
"epoch": 0.09,
"learning_rate": 3.0935251798561156e-05,
"loss": 3.4442,
"step": 430
},
{
"epoch": 0.09,
"learning_rate": 3.165467625899281e-05,
"loss": 3.4401,
"step": 440
},
{
"epoch": 0.1,
"learning_rate": 3.237410071942446e-05,
"loss": 3.3566,
"step": 450
},
{
"epoch": 0.1,
"learning_rate": 3.3093525179856116e-05,
"loss": 3.3678,
"step": 460
},
{
"epoch": 0.1,
"learning_rate": 3.3812949640287773e-05,
"loss": 3.3995,
"step": 470
},
{
"epoch": 0.1,
"learning_rate": 3.453237410071943e-05,
"loss": 3.4305,
"step": 480
},
{
"epoch": 0.11,
"learning_rate": 3.5251798561151075e-05,
"loss": 3.3855,
"step": 490
},
{
"epoch": 0.11,
"learning_rate": 3.597122302158273e-05,
"loss": 3.421,
"step": 500
},
{
"epoch": 0.11,
"learning_rate": 3.669064748201439e-05,
"loss": 3.3819,
"step": 510
},
{
"epoch": 0.11,
"learning_rate": 3.741007194244605e-05,
"loss": 3.3863,
"step": 520
},
{
"epoch": 0.11,
"learning_rate": 3.81294964028777e-05,
"loss": 3.4173,
"step": 530
},
{
"epoch": 0.12,
"learning_rate": 3.884892086330936e-05,
"loss": 3.3894,
"step": 540
},
{
"epoch": 0.12,
"learning_rate": 3.956834532374101e-05,
"loss": 3.4352,
"step": 550
},
{
"epoch": 0.12,
"learning_rate": 4.0287769784172666e-05,
"loss": 3.4051,
"step": 560
},
{
"epoch": 0.12,
"learning_rate": 4.100719424460432e-05,
"loss": 3.4108,
"step": 570
},
{
"epoch": 0.13,
"learning_rate": 4.1726618705035975e-05,
"loss": 3.3722,
"step": 580
},
{
"epoch": 0.13,
"learning_rate": 4.244604316546763e-05,
"loss": 3.3759,
"step": 590
},
{
"epoch": 0.13,
"learning_rate": 4.3165467625899284e-05,
"loss": 3.421,
"step": 600
},
{
"epoch": 0.13,
"learning_rate": 4.3884892086330935e-05,
"loss": 3.4149,
"step": 610
},
{
"epoch": 0.13,
"learning_rate": 4.460431654676259e-05,
"loss": 3.3518,
"step": 620
},
{
"epoch": 0.14,
"learning_rate": 4.532374100719425e-05,
"loss": 3.408,
"step": 630
},
{
"epoch": 0.14,
"learning_rate": 4.60431654676259e-05,
"loss": 3.334,
"step": 640
},
{
"epoch": 0.14,
"learning_rate": 4.676258992805755e-05,
"loss": 3.389,
"step": 650
},
{
"epoch": 0.14,
"learning_rate": 4.748201438848921e-05,
"loss": 3.3713,
"step": 660
},
{
"epoch": 0.14,
"learning_rate": 4.820143884892087e-05,
"loss": 3.3702,
"step": 670
},
{
"epoch": 0.15,
"learning_rate": 4.892086330935252e-05,
"loss": 3.4156,
"step": 680
},
{
"epoch": 0.15,
"learning_rate": 4.964028776978418e-05,
"loss": 3.4023,
"step": 690
},
{
"epoch": 0.15,
"learning_rate": 4.999999388865356e-05,
"loss": 3.3626,
"step": 700
},
{
"epoch": 0.15,
"learning_rate": 4.9999944997899994e-05,
"loss": 3.3751,
"step": 710
},
{
"epoch": 0.16,
"learning_rate": 4.999984721648846e-05,
"loss": 3.3962,
"step": 720
},
{
"epoch": 0.16,
"learning_rate": 4.9999700544610196e-05,
"loss": 3.3803,
"step": 730
},
{
"epoch": 0.16,
"learning_rate": 4.999950498255203e-05,
"loss": 3.3778,
"step": 740
},
{
"epoch": 0.16,
"learning_rate": 4.999926053069641e-05,
"loss": 3.3897,
"step": 750
},
{
"epoch": 0.16,
"learning_rate": 4.99989671895214e-05,
"loss": 3.4078,
"step": 760
},
{
"epoch": 0.17,
"learning_rate": 4.9998624959600656e-05,
"loss": 3.3542,
"step": 770
},
{
"epoch": 0.17,
"learning_rate": 4.999823384160347e-05,
"loss": 3.3488,
"step": 780
},
{
"epoch": 0.17,
"learning_rate": 4.999779383629471e-05,
"loss": 3.3988,
"step": 790
},
{
"epoch": 0.17,
"learning_rate": 4.999730494453487e-05,
"loss": 3.3759,
"step": 800
},
{
"epoch": 0.17,
"learning_rate": 4.9996767167280034e-05,
"loss": 3.3742,
"step": 810
},
{
"epoch": 0.18,
"learning_rate": 4.9996180505581904e-05,
"loss": 3.3853,
"step": 820
},
{
"epoch": 0.18,
"learning_rate": 4.999554496058777e-05,
"loss": 3.3318,
"step": 830
},
{
"epoch": 0.18,
"learning_rate": 4.9994860533540526e-05,
"loss": 3.3575,
"step": 840
},
{
"epoch": 0.18,
"learning_rate": 4.999412722577866e-05,
"loss": 3.3645,
"step": 850
},
{
"epoch": 0.19,
"learning_rate": 4.999334503873624e-05,
"loss": 3.3681,
"step": 860
},
{
"epoch": 0.19,
"learning_rate": 4.9992513973942954e-05,
"loss": 3.3345,
"step": 870
},
{
"epoch": 0.19,
"learning_rate": 4.9991634033024036e-05,
"loss": 3.385,
"step": 880
},
{
"epoch": 0.19,
"learning_rate": 4.999070521770034e-05,
"loss": 3.343,
"step": 890
},
{
"epoch": 0.19,
"learning_rate": 4.998972752978828e-05,
"loss": 3.3678,
"step": 900
},
{
"epoch": 0.2,
"learning_rate": 4.998870097119985e-05,
"loss": 3.349,
"step": 910
},
{
"epoch": 0.2,
"learning_rate": 4.9987625543942635e-05,
"loss": 3.3943,
"step": 920
},
{
"epoch": 0.2,
"learning_rate": 4.998650125011975e-05,
"loss": 3.3375,
"step": 930
},
{
"epoch": 0.2,
"learning_rate": 4.9985328091929916e-05,
"loss": 3.4051,
"step": 940
},
{
"epoch": 0.21,
"learning_rate": 4.9984106071667394e-05,
"loss": 3.3285,
"step": 950
},
{
"epoch": 0.21,
"learning_rate": 4.9982835191722e-05,
"loss": 3.3466,
"step": 960
},
{
"epoch": 0.21,
"learning_rate": 4.9981515454579107e-05,
"loss": 3.3341,
"step": 970
},
{
"epoch": 0.21,
"learning_rate": 4.9980146862819634e-05,
"loss": 3.3003,
"step": 980
},
{
"epoch": 0.21,
"learning_rate": 4.997872941912004e-05,
"loss": 3.3813,
"step": 990
},
{
"epoch": 0.22,
"learning_rate": 4.9977263126252326e-05,
"loss": 3.3458,
"step": 1000
},
{
"epoch": 0.22,
"learning_rate": 4.997574798708401e-05,
"loss": 3.3217,
"step": 1010
},
{
"epoch": 0.22,
"learning_rate": 4.997418400457815e-05,
"loss": 3.3223,
"step": 1020
},
{
"epoch": 0.22,
"learning_rate": 4.997257118179332e-05,
"loss": 3.3682,
"step": 1030
},
{
"epoch": 0.22,
"learning_rate": 4.99709095218836e-05,
"loss": 3.3303,
"step": 1040
},
{
"epoch": 0.23,
"learning_rate": 4.99691990280986e-05,
"loss": 3.3583,
"step": 1050
},
{
"epoch": 0.23,
"learning_rate": 4.996743970378338e-05,
"loss": 3.3536,
"step": 1060
},
{
"epoch": 0.23,
"learning_rate": 4.9965631552378564e-05,
"loss": 3.363,
"step": 1070
},
{
"epoch": 0.23,
"learning_rate": 4.9963774577420205e-05,
"loss": 3.3257,
"step": 1080
},
{
"epoch": 0.24,
"learning_rate": 4.996186878253988e-05,
"loss": 3.3474,
"step": 1090
},
{
"epoch": 0.24,
"learning_rate": 4.9959914171464596e-05,
"loss": 3.3786,
"step": 1100
},
{
"epoch": 0.24,
"learning_rate": 4.995791074801687e-05,
"loss": 3.3219,
"step": 1110
},
{
"epoch": 0.24,
"learning_rate": 4.995585851611464e-05,
"loss": 3.3586,
"step": 1120
},
{
"epoch": 0.24,
"learning_rate": 4.995375747977133e-05,
"loss": 3.3308,
"step": 1130
},
{
"epoch": 0.25,
"learning_rate": 4.9951607643095796e-05,
"loss": 3.2885,
"step": 1140
},
{
"epoch": 0.25,
"learning_rate": 4.99494090102923e-05,
"loss": 3.3731,
"step": 1150
},
{
"epoch": 0.25,
"learning_rate": 4.994716158566058e-05,
"loss": 3.3311,
"step": 1160
},
{
"epoch": 0.25,
"learning_rate": 4.994486537359575e-05,
"loss": 3.3108,
"step": 1170
},
{
"epoch": 0.25,
"learning_rate": 4.994252037858836e-05,
"loss": 3.2939,
"step": 1180
},
{
"epoch": 0.26,
"learning_rate": 4.994012660522436e-05,
"loss": 3.3468,
"step": 1190
},
{
"epoch": 0.26,
"learning_rate": 4.993768405818508e-05,
"loss": 3.3452,
"step": 1200
},
{
"epoch": 0.26,
"learning_rate": 4.993519274224724e-05,
"loss": 3.4028,
"step": 1210
},
{
"epoch": 0.26,
"learning_rate": 4.993265266228293e-05,
"loss": 3.3021,
"step": 1220
},
{
"epoch": 0.27,
"learning_rate": 4.993006382325962e-05,
"loss": 3.3746,
"step": 1230
},
{
"epoch": 0.27,
"learning_rate": 4.992742623024011e-05,
"loss": 3.3682,
"step": 1240
},
{
"epoch": 0.27,
"learning_rate": 4.992473988838257e-05,
"loss": 3.3018,
"step": 1250
},
{
"epoch": 0.27,
"learning_rate": 4.9922004802940476e-05,
"loss": 3.3312,
"step": 1260
},
{
"epoch": 0.27,
"learning_rate": 4.991922097926266e-05,
"loss": 3.2805,
"step": 1270
},
{
"epoch": 0.28,
"learning_rate": 4.9916388422793246e-05,
"loss": 3.329,
"step": 1280
},
{
"epoch": 0.28,
"learning_rate": 4.991350713907167e-05,
"loss": 3.339,
"step": 1290
},
{
"epoch": 0.28,
"learning_rate": 4.991057713373266e-05,
"loss": 3.3383,
"step": 1300
},
{
"epoch": 0.28,
"learning_rate": 4.9907598412506215e-05,
"loss": 3.3469,
"step": 1310
},
{
"epoch": 0.28,
"learning_rate": 4.9904570981217624e-05,
"loss": 3.3102,
"step": 1320
},
{
"epoch": 0.29,
"learning_rate": 4.990149484578742e-05,
"loss": 3.2872,
"step": 1330
},
{
"epoch": 0.29,
"learning_rate": 4.98983700122314e-05,
"loss": 3.3041,
"step": 1340
},
{
"epoch": 0.29,
"learning_rate": 4.989519648666056e-05,
"loss": 3.362,
"step": 1350
},
{
"epoch": 0.29,
"learning_rate": 4.989197427528116e-05,
"loss": 3.2895,
"step": 1360
},
{
"epoch": 0.3,
"learning_rate": 4.9888703384394655e-05,
"loss": 3.3185,
"step": 1370
},
{
"epoch": 0.3,
"learning_rate": 4.988538382039769e-05,
"loss": 3.3665,
"step": 1380
},
{
"epoch": 0.3,
"learning_rate": 4.988201558978212e-05,
"loss": 3.2901,
"step": 1390
},
{
"epoch": 0.3,
"learning_rate": 4.9878598699134945e-05,
"loss": 3.3005,
"step": 1400
},
{
"epoch": 0.3,
"learning_rate": 4.9875133155138357e-05,
"loss": 3.2858,
"step": 1410
},
{
"epoch": 0.31,
"learning_rate": 4.9871618964569666e-05,
"loss": 3.3412,
"step": 1420
},
{
"epoch": 0.31,
"learning_rate": 4.9868056134301333e-05,
"loss": 3.2808,
"step": 1430
},
{
"epoch": 0.31,
"learning_rate": 4.986444467130095e-05,
"loss": 3.2828,
"step": 1440
},
{
"epoch": 0.31,
"learning_rate": 4.9860784582631184e-05,
"loss": 3.3237,
"step": 1450
},
{
"epoch": 0.32,
"learning_rate": 4.985707587544983e-05,
"loss": 3.3013,
"step": 1460
},
{
"epoch": 0.32,
"learning_rate": 4.9853318557009756e-05,
"loss": 3.3343,
"step": 1470
},
{
"epoch": 0.32,
"learning_rate": 4.984951263465887e-05,
"loss": 3.301,
"step": 1480
},
{
"epoch": 0.32,
"learning_rate": 4.9845658115840166e-05,
"loss": 3.3194,
"step": 1490
},
{
"epoch": 0.32,
"learning_rate": 4.984175500809165e-05,
"loss": 3.3356,
"step": 1500
},
{
"epoch": 0.33,
"learning_rate": 4.9837803319046365e-05,
"loss": 3.2681,
"step": 1510
},
{
"epoch": 0.33,
"learning_rate": 4.983380305643235e-05,
"loss": 3.2769,
"step": 1520
},
{
"epoch": 0.33,
"learning_rate": 4.982975422807265e-05,
"loss": 3.3332,
"step": 1530
},
{
"epoch": 0.33,
"learning_rate": 4.982565684188526e-05,
"loss": 3.2726,
"step": 1540
},
{
"epoch": 0.33,
"learning_rate": 4.982151090588318e-05,
"loss": 3.2455,
"step": 1550
},
{
"epoch": 0.34,
"learning_rate": 4.981731642817431e-05,
"loss": 3.2954,
"step": 1560
},
{
"epoch": 0.34,
"learning_rate": 4.9813073416961494e-05,
"loss": 3.2877,
"step": 1570
},
{
"epoch": 0.34,
"learning_rate": 4.980878188054252e-05,
"loss": 3.287,
"step": 1580
},
{
"epoch": 0.34,
"learning_rate": 4.980444182731002e-05,
"loss": 3.3168,
"step": 1590
},
{
"epoch": 0.35,
"learning_rate": 4.9800053265751555e-05,
"loss": 3.3364,
"step": 1600
},
{
"epoch": 0.35,
"learning_rate": 4.979561620444952e-05,
"loss": 3.3212,
"step": 1610
},
{
"epoch": 0.35,
"learning_rate": 4.979113065208117e-05,
"loss": 3.2759,
"step": 1620
},
{
"epoch": 0.35,
"learning_rate": 4.978659661741859e-05,
"loss": 3.2946,
"step": 1630
},
{
"epoch": 0.35,
"learning_rate": 4.9782014109328674e-05,
"loss": 3.3021,
"step": 1640
},
{
"epoch": 0.36,
"learning_rate": 4.977738313677312e-05,
"loss": 3.3222,
"step": 1650
},
{
"epoch": 0.36,
"learning_rate": 4.97727037088084e-05,
"loss": 3.2956,
"step": 1660
},
{
"epoch": 0.36,
"learning_rate": 4.976797583458573e-05,
"loss": 3.3278,
"step": 1670
},
{
"epoch": 0.36,
"learning_rate": 4.976319952335112e-05,
"loss": 3.3005,
"step": 1680
},
{
"epoch": 0.36,
"learning_rate": 4.975837478444524e-05,
"loss": 3.314,
"step": 1690
},
{
"epoch": 0.37,
"learning_rate": 4.975350162730351e-05,
"loss": 3.2943,
"step": 1700
},
{
"epoch": 0.37,
"learning_rate": 4.974858006145602e-05,
"loss": 3.2535,
"step": 1710
},
{
"epoch": 0.37,
"learning_rate": 4.974361009652753e-05,
"loss": 3.2621,
"step": 1720
},
{
"epoch": 0.37,
"learning_rate": 4.9738591742237476e-05,
"loss": 3.2934,
"step": 1730
},
{
"epoch": 0.38,
"learning_rate": 4.9733525008399886e-05,
"loss": 3.2551,
"step": 1740
},
{
"epoch": 0.38,
"learning_rate": 4.972840990492342e-05,
"loss": 3.3217,
"step": 1750
},
{
"epoch": 0.38,
"learning_rate": 4.9723246441811346e-05,
"loss": 3.275,
"step": 1760
},
{
"epoch": 0.38,
"learning_rate": 4.971803462916148e-05,
"loss": 3.3001,
"step": 1770
},
{
"epoch": 0.38,
"learning_rate": 4.9712774477166204e-05,
"loss": 3.2643,
"step": 1780
},
{
"epoch": 0.39,
"learning_rate": 4.970746599611243e-05,
"loss": 3.3422,
"step": 1790
},
{
"epoch": 0.39,
"learning_rate": 4.9702109196381585e-05,
"loss": 3.2945,
"step": 1800
},
{
"epoch": 0.39,
"learning_rate": 4.9696704088449605e-05,
"loss": 3.2676,
"step": 1810
},
{
"epoch": 0.39,
"learning_rate": 4.9691250682886866e-05,
"loss": 3.2506,
"step": 1820
},
{
"epoch": 0.4,
"learning_rate": 4.968574899035822e-05,
"loss": 3.2974,
"step": 1830
},
{
"epoch": 0.4,
"learning_rate": 4.968019902162295e-05,
"loss": 3.247,
"step": 1840
},
{
"epoch": 0.4,
"learning_rate": 4.967460078753475e-05,
"loss": 3.2732,
"step": 1850
},
{
"epoch": 0.4,
"learning_rate": 4.9668954299041685e-05,
"loss": 3.2766,
"step": 1860
},
{
"epoch": 0.4,
"learning_rate": 4.96632595671862e-05,
"loss": 3.3118,
"step": 1870
},
{
"epoch": 0.41,
"learning_rate": 4.9657516603105104e-05,
"loss": 3.2738,
"step": 1880
},
{
"epoch": 0.41,
"learning_rate": 4.96517254180295e-05,
"loss": 3.2563,
"step": 1890
},
{
"epoch": 0.41,
"learning_rate": 4.964588602328481e-05,
"loss": 3.3043,
"step": 1900
},
{
"epoch": 0.41,
"learning_rate": 4.963999843029073e-05,
"loss": 3.2722,
"step": 1910
},
{
"epoch": 0.41,
"learning_rate": 4.9634062650561225e-05,
"loss": 3.2564,
"step": 1920
},
{
"epoch": 0.42,
"learning_rate": 4.962807869570448e-05,
"loss": 3.2751,
"step": 1930
},
{
"epoch": 0.42,
"learning_rate": 4.9622046577422895e-05,
"loss": 3.3186,
"step": 1940
},
{
"epoch": 0.42,
"learning_rate": 4.961596630751308e-05,
"loss": 3.2683,
"step": 1950
},
{
"epoch": 0.42,
"learning_rate": 4.9609837897865795e-05,
"loss": 3.2541,
"step": 1960
},
{
"epoch": 0.43,
"learning_rate": 4.9603661360465944e-05,
"loss": 3.2493,
"step": 1970
},
{
"epoch": 0.43,
"learning_rate": 4.9597436707392545e-05,
"loss": 3.3009,
"step": 1980
},
{
"epoch": 0.43,
"learning_rate": 4.959116395081872e-05,
"loss": 3.2042,
"step": 1990
},
{
"epoch": 0.43,
"learning_rate": 4.9584843103011666e-05,
"loss": 3.238,
"step": 2000
},
{
"epoch": 0.43,
"learning_rate": 4.957847417633264e-05,
"loss": 3.302,
"step": 2010
},
{
"epoch": 0.44,
"learning_rate": 4.9572057183236884e-05,
"loss": 3.307,
"step": 2020
},
{
"epoch": 0.44,
"learning_rate": 4.95655921362737e-05,
"loss": 3.2456,
"step": 2030
},
{
"epoch": 0.44,
"learning_rate": 4.95590790480863e-05,
"loss": 3.2762,
"step": 2040
},
{
"epoch": 0.44,
"learning_rate": 4.955251793141189e-05,
"loss": 3.2395,
"step": 2050
},
{
"epoch": 0.44,
"learning_rate": 4.95459087990816e-05,
"loss": 3.2899,
"step": 2060
},
{
"epoch": 0.45,
"learning_rate": 4.9539251664020435e-05,
"loss": 3.2941,
"step": 2070
},
{
"epoch": 0.45,
"learning_rate": 4.953254653924732e-05,
"loss": 3.2739,
"step": 2080
},
{
"epoch": 0.45,
"learning_rate": 4.952579343787496e-05,
"loss": 3.2443,
"step": 2090
},
{
"epoch": 0.45,
"learning_rate": 4.951899237310996e-05,
"loss": 3.2518,
"step": 2100
},
{
"epoch": 0.46,
"learning_rate": 4.951214335825268e-05,
"loss": 3.2615,
"step": 2110
},
{
"epoch": 0.46,
"learning_rate": 4.950524640669727e-05,
"loss": 3.2618,
"step": 2120
},
{
"epoch": 0.46,
"learning_rate": 4.949830153193161e-05,
"loss": 3.2335,
"step": 2130
},
{
"epoch": 0.46,
"learning_rate": 4.949130874753731e-05,
"loss": 3.2111,
"step": 2140
},
{
"epoch": 0.46,
"learning_rate": 4.9484268067189685e-05,
"loss": 3.2711,
"step": 2150
},
{
"epoch": 0.47,
"learning_rate": 4.947717950465769e-05,
"loss": 3.2634,
"step": 2160
},
{
"epoch": 0.47,
"learning_rate": 4.947004307380394e-05,
"loss": 3.3269,
"step": 2170
},
{
"epoch": 0.47,
"learning_rate": 4.946285878858467e-05,
"loss": 3.2958,
"step": 2180
},
{
"epoch": 0.47,
"learning_rate": 4.945562666304967e-05,
"loss": 3.2561,
"step": 2190
},
{
"epoch": 0.47,
"learning_rate": 4.9448346711342306e-05,
"loss": 3.2538,
"step": 2200
},
{
"epoch": 0.48,
"learning_rate": 4.94410189476995e-05,
"loss": 3.2647,
"step": 2210
},
{
"epoch": 0.48,
"learning_rate": 4.9433643386451625e-05,
"loss": 3.2753,
"step": 2220
},
{
"epoch": 0.48,
"learning_rate": 4.942622004202255e-05,
"loss": 3.2721,
"step": 2230
},
{
"epoch": 0.48,
"learning_rate": 4.941874892892963e-05,
"loss": 3.2566,
"step": 2240
},
{
"epoch": 0.49,
"learning_rate": 4.941123006178357e-05,
"loss": 3.3152,
"step": 2250
},
{
"epoch": 0.49,
"learning_rate": 4.94036634552885e-05,
"loss": 3.2924,
"step": 2260
},
{
"epoch": 0.49,
"learning_rate": 4.939604912424192e-05,
"loss": 3.3046,
"step": 2270
},
{
"epoch": 0.49,
"learning_rate": 4.938838708353464e-05,
"loss": 3.2171,
"step": 2280
},
{
"epoch": 0.49,
"learning_rate": 4.9380677348150786e-05,
"loss": 3.3019,
"step": 2290
},
{
"epoch": 0.5,
"learning_rate": 4.937291993316775e-05,
"loss": 3.2984,
"step": 2300
},
{
"epoch": 0.5,
"learning_rate": 4.9365114853756164e-05,
"loss": 3.3227,
"step": 2310
},
{
"epoch": 0.5,
"learning_rate": 4.93572621251799e-05,
"loss": 3.2673,
"step": 2320
},
{
"epoch": 0.5,
"learning_rate": 4.934936176279598e-05,
"loss": 3.2657,
"step": 2330
},
{
"epoch": 0.51,
"learning_rate": 4.9341413782054594e-05,
"loss": 3.1637,
"step": 2340
},
{
"epoch": 0.51,
"learning_rate": 4.933341819849906e-05,
"loss": 3.274,
"step": 2350
},
{
"epoch": 0.51,
"learning_rate": 4.9325375027765786e-05,
"loss": 3.2957,
"step": 2360
},
{
"epoch": 0.51,
"learning_rate": 4.9317284285584245e-05,
"loss": 3.2735,
"step": 2370
},
{
"epoch": 0.51,
"learning_rate": 4.9309145987776926e-05,
"loss": 3.2637,
"step": 2380
},
{
"epoch": 0.52,
"learning_rate": 4.9300960150259354e-05,
"loss": 3.3366,
"step": 2390
},
{
"epoch": 0.52,
"learning_rate": 4.929272678903999e-05,
"loss": 3.2555,
"step": 2400
},
{
"epoch": 0.52,
"learning_rate": 4.9284445920220255e-05,
"loss": 3.2773,
"step": 2410
},
{
"epoch": 0.52,
"learning_rate": 4.927611755999446e-05,
"loss": 3.228,
"step": 2420
},
{
"epoch": 0.52,
"learning_rate": 4.926774172464982e-05,
"loss": 3.3036,
"step": 2430
},
{
"epoch": 0.53,
"learning_rate": 4.925931843056635e-05,
"loss": 3.2746,
"step": 2440
},
{
"epoch": 0.53,
"learning_rate": 4.9250847694216916e-05,
"loss": 3.2754,
"step": 2450
},
{
"epoch": 0.53,
"learning_rate": 4.924232953216715e-05,
"loss": 3.2619,
"step": 2460
},
{
"epoch": 0.53,
"learning_rate": 4.923376396107543e-05,
"loss": 3.2097,
"step": 2470
},
{
"epoch": 0.54,
"learning_rate": 4.922515099769285e-05,
"loss": 3.301,
"step": 2480
},
{
"epoch": 0.54,
"learning_rate": 4.921649065886318e-05,
"loss": 3.251,
"step": 2490
},
{
"epoch": 0.54,
"learning_rate": 4.9207782961522844e-05,
"loss": 3.282,
"step": 2500
},
{
"epoch": 0.54,
"learning_rate": 4.919902792270088e-05,
"loss": 3.293,
"step": 2510
},
{
"epoch": 0.54,
"learning_rate": 4.919022555951892e-05,
"loss": 3.2864,
"step": 2520
},
{
"epoch": 0.55,
"learning_rate": 4.918137588919113e-05,
"loss": 3.2326,
"step": 2530
},
{
"epoch": 0.55,
"learning_rate": 4.917247892902419e-05,
"loss": 3.299,
"step": 2540
},
{
"epoch": 0.55,
"learning_rate": 4.916353469641727e-05,
"loss": 3.242,
"step": 2550
},
{
"epoch": 0.55,
"learning_rate": 4.915454320886199e-05,
"loss": 3.2348,
"step": 2560
},
{
"epoch": 0.55,
"learning_rate": 4.9145504483942384e-05,
"loss": 3.2232,
"step": 2570
},
{
"epoch": 0.56,
"learning_rate": 4.913641853933484e-05,
"loss": 3.256,
"step": 2580
},
{
"epoch": 0.56,
"learning_rate": 4.912728539280813e-05,
"loss": 3.1991,
"step": 2590
},
{
"epoch": 0.56,
"learning_rate": 4.9118105062223305e-05,
"loss": 3.2452,
"step": 2600
},
{
"epoch": 0.56,
"learning_rate": 4.9108877565533694e-05,
"loss": 3.3276,
"step": 2610
},
{
"epoch": 0.57,
"learning_rate": 4.9099602920784884e-05,
"loss": 3.2496,
"step": 2620
},
{
"epoch": 0.57,
"learning_rate": 4.9090281146114655e-05,
"loss": 3.2857,
"step": 2630
},
{
"epoch": 0.57,
"learning_rate": 4.9080912259752946e-05,
"loss": 3.2562,
"step": 2640
},
{
"epoch": 0.57,
"learning_rate": 4.907149628002184e-05,
"loss": 3.2469,
"step": 2650
},
{
"epoch": 0.57,
"learning_rate": 4.906203322533552e-05,
"loss": 3.2338,
"step": 2660
},
{
"epoch": 0.58,
"learning_rate": 4.905252311420022e-05,
"loss": 3.2455,
"step": 2670
},
{
"epoch": 0.58,
"learning_rate": 4.9042965965214205e-05,
"loss": 3.2221,
"step": 2680
},
{
"epoch": 0.58,
"learning_rate": 4.903336179706773e-05,
"loss": 3.2349,
"step": 2690
},
{
"epoch": 0.58,
"learning_rate": 4.9023710628543004e-05,
"loss": 3.2576,
"step": 2700
},
{
"epoch": 0.59,
"learning_rate": 4.901401247851415e-05,
"loss": 3.2727,
"step": 2710
},
{
"epoch": 0.59,
"learning_rate": 4.900426736594716e-05,
"loss": 3.2918,
"step": 2720
},
{
"epoch": 0.59,
"learning_rate": 4.899447530989987e-05,
"loss": 3.2933,
"step": 2730
},
{
"epoch": 0.59,
"learning_rate": 4.8984636329521936e-05,
"loss": 3.2786,
"step": 2740
},
{
"epoch": 0.59,
"learning_rate": 4.897475044405476e-05,
"loss": 3.276,
"step": 2750
},
{
"epoch": 0.6,
"learning_rate": 4.896481767283149e-05,
"loss": 3.2557,
"step": 2760
},
{
"epoch": 0.6,
"learning_rate": 4.895483803527695e-05,
"loss": 3.2517,
"step": 2770
},
{
"epoch": 0.6,
"learning_rate": 4.894481155090763e-05,
"loss": 3.2673,
"step": 2780
},
{
"epoch": 0.6,
"learning_rate": 4.8934738239331634e-05,
"loss": 3.2854,
"step": 2790
},
{
"epoch": 0.6,
"learning_rate": 4.892461812024863e-05,
"loss": 3.2277,
"step": 2800
},
{
"epoch": 0.61,
"learning_rate": 4.891445121344983e-05,
"loss": 3.2537,
"step": 2810
},
{
"epoch": 0.61,
"learning_rate": 4.8904237538817966e-05,
"loss": 3.1999,
"step": 2820
},
{
"epoch": 0.61,
"learning_rate": 4.8893977116327195e-05,
"loss": 3.2162,
"step": 2830
},
{
"epoch": 0.61,
"learning_rate": 4.888366996604312e-05,
"loss": 3.2757,
"step": 2840
},
{
"epoch": 0.62,
"learning_rate": 4.8873316108122714e-05,
"loss": 3.2181,
"step": 2850
},
{
"epoch": 0.62,
"learning_rate": 4.886291556281431e-05,
"loss": 3.2265,
"step": 2860
},
{
"epoch": 0.62,
"learning_rate": 4.885246835045752e-05,
"loss": 3.2064,
"step": 2870
},
{
"epoch": 0.62,
"learning_rate": 4.8841974491483244e-05,
"loss": 3.2932,
"step": 2880
},
{
"epoch": 0.62,
"learning_rate": 4.883143400641359e-05,
"loss": 3.2837,
"step": 2890
},
{
"epoch": 0.63,
"learning_rate": 4.882084691586185e-05,
"loss": 3.2177,
"step": 2900
},
{
"epoch": 0.63,
"learning_rate": 4.881021324053247e-05,
"loss": 3.2502,
"step": 2910
},
{
"epoch": 0.63,
"learning_rate": 4.8799533001220984e-05,
"loss": 3.2455,
"step": 2920
},
{
"epoch": 0.63,
"learning_rate": 4.8788806218814e-05,
"loss": 3.2652,
"step": 2930
},
{
"epoch": 0.63,
"learning_rate": 4.877803291428913e-05,
"loss": 3.2276,
"step": 2940
},
{
"epoch": 0.64,
"learning_rate": 4.876721310871499e-05,
"loss": 3.2539,
"step": 2950
},
{
"epoch": 0.64,
"learning_rate": 4.875634682325113e-05,
"loss": 3.2329,
"step": 2960
},
{
"epoch": 0.64,
"learning_rate": 4.874543407914797e-05,
"loss": 3.2349,
"step": 2970
},
{
"epoch": 0.64,
"learning_rate": 4.873447489774682e-05,
"loss": 3.2197,
"step": 2980
},
{
"epoch": 0.65,
"learning_rate": 4.8723469300479785e-05,
"loss": 3.2482,
"step": 2990
},
{
"epoch": 0.65,
"learning_rate": 4.871241730886976e-05,
"loss": 3.2096,
"step": 3000
},
{
"epoch": 0.65,
"learning_rate": 4.8701318944530346e-05,
"loss": 3.246,
"step": 3010
},
{
"epoch": 0.65,
"learning_rate": 4.869017422916584e-05,
"loss": 3.2549,
"step": 3020
},
{
"epoch": 0.65,
"learning_rate": 4.867898318457121e-05,
"loss": 3.2463,
"step": 3030
},
{
"epoch": 0.66,
"learning_rate": 4.866774583263198e-05,
"loss": 3.2694,
"step": 3040
},
{
"epoch": 0.66,
"learning_rate": 4.865646219532428e-05,
"loss": 3.2059,
"step": 3050
},
{
"epoch": 0.66,
"learning_rate": 4.8645132294714726e-05,
"loss": 3.2356,
"step": 3060
},
{
"epoch": 0.66,
"learning_rate": 4.8633756152960416e-05,
"loss": 3.2536,
"step": 3070
},
{
"epoch": 0.66,
"learning_rate": 4.862233379230889e-05,
"loss": 3.2186,
"step": 3080
},
{
"epoch": 0.67,
"learning_rate": 4.861086523509806e-05,
"loss": 3.2559,
"step": 3090
},
{
"epoch": 0.67,
"learning_rate": 4.8599350503756194e-05,
"loss": 3.2693,
"step": 3100
},
{
"epoch": 0.67,
"learning_rate": 4.858778962080184e-05,
"loss": 3.2265,
"step": 3110
},
{
"epoch": 0.67,
"learning_rate": 4.857618260884383e-05,
"loss": 3.1982,
"step": 3120
},
{
"epoch": 0.68,
"learning_rate": 4.856452949058118e-05,
"loss": 3.2221,
"step": 3130
},
{
"epoch": 0.68,
"learning_rate": 4.8552830288803096e-05,
"loss": 3.2693,
"step": 3140
},
{
"epoch": 0.68,
"learning_rate": 4.8541085026388885e-05,
"loss": 3.2149,
"step": 3150
},
{
"epoch": 0.68,
"learning_rate": 4.8529293726307954e-05,
"loss": 3.232,
"step": 3160
},
{
"epoch": 0.68,
"learning_rate": 4.8517456411619724e-05,
"loss": 3.2457,
"step": 3170
},
{
"epoch": 0.69,
"learning_rate": 4.85055731054736e-05,
"loss": 3.2641,
"step": 3180
},
{
"epoch": 0.69,
"learning_rate": 4.849364383110895e-05,
"loss": 3.2265,
"step": 3190
},
{
"epoch": 0.69,
"learning_rate": 4.848166861185503e-05,
"loss": 3.19,
"step": 3200
},
{
"epoch": 0.69,
"learning_rate": 4.846964747113094e-05,
"loss": 3.2042,
"step": 3210
},
{
"epoch": 0.7,
"learning_rate": 4.845758043244559e-05,
"loss": 3.1866,
"step": 3220
},
{
"epoch": 0.7,
"learning_rate": 4.844546751939766e-05,
"loss": 3.2578,
"step": 3230
},
{
"epoch": 0.7,
"learning_rate": 4.843330875567553e-05,
"loss": 3.2358,
"step": 3240
},
{
"epoch": 0.7,
"learning_rate": 4.842110416505724e-05,
"loss": 3.2441,
"step": 3250
},
{
"epoch": 0.7,
"learning_rate": 4.840885377141047e-05,
"loss": 3.2166,
"step": 3260
},
{
"epoch": 0.71,
"learning_rate": 4.839655759869247e-05,
"loss": 3.219,
"step": 3270
},
{
"epoch": 0.71,
"learning_rate": 4.8384215670949994e-05,
"loss": 3.2623,
"step": 3280
},
{
"epoch": 0.71,
"learning_rate": 4.8371828012319315e-05,
"loss": 3.2096,
"step": 3290
},
{
"epoch": 0.71,
"learning_rate": 4.83593946470261e-05,
"loss": 3.2061,
"step": 3300
},
{
"epoch": 0.71,
"learning_rate": 4.8346915599385434e-05,
"loss": 3.2489,
"step": 3310
},
{
"epoch": 0.72,
"learning_rate": 4.833439089380172e-05,
"loss": 3.1859,
"step": 3320
},
{
"epoch": 0.72,
"learning_rate": 4.832182055476865e-05,
"loss": 3.2674,
"step": 3330
},
{
"epoch": 0.72,
"learning_rate": 4.830920460686916e-05,
"loss": 3.2582,
"step": 3340
},
{
"epoch": 0.72,
"learning_rate": 4.82965430747754e-05,
"loss": 3.2694,
"step": 3350
},
{
"epoch": 0.73,
"learning_rate": 4.8283835983248635e-05,
"loss": 3.201,
"step": 3360
},
{
"epoch": 0.73,
"learning_rate": 4.827108335713926e-05,
"loss": 3.2592,
"step": 3370
},
{
"epoch": 0.73,
"learning_rate": 4.825828522138668e-05,
"loss": 3.2483,
"step": 3380
},
{
"epoch": 0.73,
"learning_rate": 4.824544160101933e-05,
"loss": 3.2182,
"step": 3390
},
{
"epoch": 0.73,
"learning_rate": 4.82325525211546e-05,
"loss": 3.228,
"step": 3400
},
{
"epoch": 0.74,
"learning_rate": 4.821961800699874e-05,
"loss": 3.2215,
"step": 3410
},
{
"epoch": 0.74,
"learning_rate": 4.820663808384691e-05,
"loss": 3.1826,
"step": 3420
},
{
"epoch": 0.74,
"learning_rate": 4.8193612777083034e-05,
"loss": 3.2198,
"step": 3430
},
{
"epoch": 0.74,
"learning_rate": 4.8180542112179795e-05,
"loss": 3.2331,
"step": 3440
},
{
"epoch": 0.74,
"learning_rate": 4.8167426114698595e-05,
"loss": 3.2887,
"step": 3450
},
{
"epoch": 0.75,
"learning_rate": 4.8154264810289473e-05,
"loss": 3.1379,
"step": 3460
},
{
"epoch": 0.75,
"learning_rate": 4.814105822469109e-05,
"loss": 3.2432,
"step": 3470
},
{
"epoch": 0.75,
"learning_rate": 4.812780638373064e-05,
"loss": 3.2247,
"step": 3480
},
{
"epoch": 0.75,
"learning_rate": 4.811450931332383e-05,
"loss": 3.2408,
"step": 3490
},
{
"epoch": 0.76,
"learning_rate": 4.810116703947482e-05,
"loss": 3.2435,
"step": 3500
},
{
"epoch": 0.76,
"learning_rate": 4.8087779588276166e-05,
"loss": 3.2466,
"step": 3510
},
{
"epoch": 0.76,
"learning_rate": 4.8074346985908784e-05,
"loss": 3.1736,
"step": 3520
},
{
"epoch": 0.76,
"learning_rate": 4.806086925864187e-05,
"loss": 3.2274,
"step": 3530
},
{
"epoch": 0.76,
"learning_rate": 4.80473464328329e-05,
"loss": 3.2527,
"step": 3540
},
{
"epoch": 0.77,
"learning_rate": 4.803377853492751e-05,
"loss": 3.1531,
"step": 3550
},
{
"epoch": 0.77,
"learning_rate": 4.80201655914595e-05,
"loss": 3.2354,
"step": 3560
},
{
"epoch": 0.77,
"learning_rate": 4.800650762905076e-05,
"loss": 3.1245,
"step": 3570
},
{
"epoch": 0.77,
"learning_rate": 4.799280467441122e-05,
"loss": 3.2432,
"step": 3580
},
{
"epoch": 0.78,
"learning_rate": 4.79790567543388e-05,
"loss": 3.2156,
"step": 3590
},
{
"epoch": 0.78,
"learning_rate": 4.7965263895719356e-05,
"loss": 3.2319,
"step": 3600
},
{
"epoch": 0.78,
"learning_rate": 4.795142612552661e-05,
"loss": 3.2462,
"step": 3610
},
{
"epoch": 0.78,
"learning_rate": 4.793754347082214e-05,
"loss": 3.1948,
"step": 3620
},
{
"epoch": 0.78,
"learning_rate": 4.7923615958755296e-05,
"loss": 3.181,
"step": 3630
},
{
"epoch": 0.79,
"learning_rate": 4.790964361656314e-05,
"loss": 3.2148,
"step": 3640
},
{
"epoch": 0.79,
"learning_rate": 4.789562647157041e-05,
"loss": 3.2806,
"step": 3650
},
{
"epoch": 0.79,
"learning_rate": 4.7881564551189466e-05,
"loss": 3.2501,
"step": 3660
},
{
"epoch": 0.79,
"learning_rate": 4.786745788292023e-05,
"loss": 3.2016,
"step": 3670
},
{
"epoch": 0.79,
"learning_rate": 4.785330649435014e-05,
"loss": 3.2697,
"step": 3680
},
{
"epoch": 0.8,
"learning_rate": 4.783911041315408e-05,
"loss": 3.2199,
"step": 3690
},
{
"epoch": 0.8,
"learning_rate": 4.782486966709434e-05,
"loss": 3.233,
"step": 3700
},
{
"epoch": 0.8,
"learning_rate": 4.781058428402055e-05,
"loss": 3.2674,
"step": 3710
},
{
"epoch": 0.8,
"learning_rate": 4.779625429186967e-05,
"loss": 3.2107,
"step": 3720
},
{
"epoch": 0.81,
"learning_rate": 4.778187971866584e-05,
"loss": 3.2161,
"step": 3730
},
{
"epoch": 0.81,
"learning_rate": 4.776746059252044e-05,
"loss": 3.1863,
"step": 3740
},
{
"epoch": 0.81,
"learning_rate": 4.775299694163194e-05,
"loss": 3.3145,
"step": 3750
},
{
"epoch": 0.81,
"learning_rate": 4.7738488794285906e-05,
"loss": 3.2307,
"step": 3760
},
{
"epoch": 0.81,
"learning_rate": 4.7723936178854914e-05,
"loss": 3.2021,
"step": 3770
},
{
"epoch": 0.82,
"learning_rate": 4.7709339123798494e-05,
"loss": 3.197,
"step": 3780
},
{
"epoch": 0.82,
"learning_rate": 4.769469765766311e-05,
"loss": 3.2191,
"step": 3790
},
{
"epoch": 0.82,
"learning_rate": 4.768001180908205e-05,
"loss": 3.2242,
"step": 3800
},
{
"epoch": 0.82,
"learning_rate": 4.766528160677541e-05,
"loss": 3.224,
"step": 3810
},
{
"epoch": 0.82,
"learning_rate": 4.7650507079550034e-05,
"loss": 3.2423,
"step": 3820
},
{
"epoch": 0.83,
"learning_rate": 4.763568825629943e-05,
"loss": 3.2121,
"step": 3830
},
{
"epoch": 0.83,
"learning_rate": 4.762082516600375e-05,
"loss": 3.204,
"step": 3840
},
{
"epoch": 0.83,
"learning_rate": 4.7605917837729704e-05,
"loss": 3.1812,
"step": 3850
},
{
"epoch": 0.83,
"learning_rate": 4.759096630063052e-05,
"loss": 3.2127,
"step": 3860
},
{
"epoch": 0.84,
"learning_rate": 4.757597058394588e-05,
"loss": 3.2645,
"step": 3870
},
{
"epoch": 0.84,
"learning_rate": 4.756093071700187e-05,
"loss": 3.2243,
"step": 3880
},
{
"epoch": 0.84,
"learning_rate": 4.754584672921091e-05,
"loss": 3.1511,
"step": 3890
},
{
"epoch": 0.84,
"learning_rate": 4.753071865007172e-05,
"loss": 3.1984,
"step": 3900
},
{
"epoch": 0.84,
"learning_rate": 4.751554650916922e-05,
"loss": 3.1949,
"step": 3910
},
{
"epoch": 0.85,
"learning_rate": 4.7500330336174514e-05,
"loss": 3.1599,
"step": 3920
},
{
"epoch": 0.85,
"learning_rate": 4.7485070160844836e-05,
"loss": 3.19,
"step": 3930
},
{
"epoch": 0.85,
"learning_rate": 4.746976601302343e-05,
"loss": 3.1638,
"step": 3940
},
{
"epoch": 0.85,
"learning_rate": 4.745441792263956e-05,
"loss": 3.1596,
"step": 3950
},
{
"epoch": 0.85,
"learning_rate": 4.743902591970843e-05,
"loss": 3.2311,
"step": 3960
},
{
"epoch": 0.86,
"learning_rate": 4.74235900343311e-05,
"loss": 3.2572,
"step": 3970
},
{
"epoch": 0.86,
"learning_rate": 4.740811029669448e-05,
"loss": 3.268,
"step": 3980
},
{
"epoch": 0.86,
"learning_rate": 4.73925867370712e-05,
"loss": 3.1662,
"step": 3990
},
{
"epoch": 0.86,
"learning_rate": 4.737701938581962e-05,
"loss": 3.2194,
"step": 4000
},
{
"epoch": 0.87,
"learning_rate": 4.736140827338372e-05,
"loss": 3.2122,
"step": 4010
},
{
"epoch": 0.87,
"learning_rate": 4.734575343029307e-05,
"loss": 3.2267,
"step": 4020
},
{
"epoch": 0.87,
"learning_rate": 4.733005488716277e-05,
"loss": 3.1749,
"step": 4030
},
{
"epoch": 0.87,
"learning_rate": 4.731431267469336e-05,
"loss": 3.2067,
"step": 4040
},
{
"epoch": 0.87,
"learning_rate": 4.729852682367081e-05,
"loss": 3.2288,
"step": 4050
},
{
"epoch": 0.88,
"learning_rate": 4.728269736496639e-05,
"loss": 3.2051,
"step": 4060
},
{
"epoch": 0.88,
"learning_rate": 4.726682432953668e-05,
"loss": 3.2347,
"step": 4070
},
{
"epoch": 0.88,
"learning_rate": 4.7250907748423504e-05,
"loss": 3.2455,
"step": 4080
},
{
"epoch": 0.88,
"learning_rate": 4.723494765275378e-05,
"loss": 3.2045,
"step": 4090
},
{
"epoch": 0.89,
"learning_rate": 4.721894407373956e-05,
"loss": 3.2157,
"step": 4100
},
{
"epoch": 0.89,
"learning_rate": 4.720289704267796e-05,
"loss": 3.1917,
"step": 4110
},
{
"epoch": 0.89,
"learning_rate": 4.7186806590951025e-05,
"loss": 3.1743,
"step": 4120
},
{
"epoch": 0.89,
"learning_rate": 4.7170672750025736e-05,
"loss": 3.2385,
"step": 4130
},
{
"epoch": 0.89,
"learning_rate": 4.715449555145394e-05,
"loss": 3.1944,
"step": 4140
},
{
"epoch": 0.9,
"learning_rate": 4.713827502687224e-05,
"loss": 3.245,
"step": 4150
},
{
"epoch": 0.9,
"learning_rate": 4.712201120800201e-05,
"loss": 3.2108,
"step": 4160
},
{
"epoch": 0.9,
"learning_rate": 4.7105704126649264e-05,
"loss": 3.2386,
"step": 4170
},
{
"epoch": 0.9,
"learning_rate": 4.708935381470463e-05,
"loss": 3.1985,
"step": 4180
},
{
"epoch": 0.9,
"learning_rate": 4.707296030414328e-05,
"loss": 3.2304,
"step": 4190
},
{
"epoch": 0.91,
"learning_rate": 4.705652362702486e-05,
"loss": 3.2157,
"step": 4200
},
{
"epoch": 0.91,
"learning_rate": 4.704004381549345e-05,
"loss": 3.2249,
"step": 4210
},
{
"epoch": 0.91,
"learning_rate": 4.7023520901777475e-05,
"loss": 3.2204,
"step": 4220
},
{
"epoch": 0.91,
"learning_rate": 4.700695491818963e-05,
"loss": 3.1909,
"step": 4230
},
{
"epoch": 0.92,
"learning_rate": 4.699034589712688e-05,
"loss": 3.2335,
"step": 4240
},
{
"epoch": 0.92,
"learning_rate": 4.697369387107033e-05,
"loss": 3.185,
"step": 4250
},
{
"epoch": 0.92,
"learning_rate": 4.695699887258519e-05,
"loss": 3.2263,
"step": 4260
},
{
"epoch": 0.92,
"learning_rate": 4.694026093432071e-05,
"loss": 3.1998,
"step": 4270
},
{
"epoch": 0.92,
"learning_rate": 4.692348008901011e-05,
"loss": 3.2261,
"step": 4280
},
{
"epoch": 0.93,
"learning_rate": 4.6906656369470536e-05,
"loss": 3.2364,
"step": 4290
},
{
"epoch": 0.93,
"learning_rate": 4.688978980860297e-05,
"loss": 3.2066,
"step": 4300
},
{
"epoch": 0.93,
"learning_rate": 4.6872880439392156e-05,
"loss": 3.1554,
"step": 4310
},
{
"epoch": 0.93,
"learning_rate": 4.685592829490659e-05,
"loss": 3.2672,
"step": 4320
},
{
"epoch": 0.93,
"learning_rate": 4.6838933408298415e-05,
"loss": 3.2105,
"step": 4330
},
{
"epoch": 0.94,
"learning_rate": 4.6821895812803315e-05,
"loss": 3.2472,
"step": 4340
},
{
"epoch": 0.94,
"learning_rate": 4.680481554174058e-05,
"loss": 3.2362,
"step": 4350
},
{
"epoch": 0.94,
"learning_rate": 4.678769262851288e-05,
"loss": 3.1689,
"step": 4360
},
{
"epoch": 0.94,
"learning_rate": 4.677052710660631e-05,
"loss": 3.1824,
"step": 4370
},
{
"epoch": 0.95,
"learning_rate": 4.67533190095903e-05,
"loss": 3.2347,
"step": 4380
},
{
"epoch": 0.95,
"learning_rate": 4.6736068371117525e-05,
"loss": 3.2139,
"step": 4390
},
{
"epoch": 0.95,
"learning_rate": 4.671877522492387e-05,
"loss": 3.2325,
"step": 4400
},
{
"epoch": 0.95,
"learning_rate": 4.6701439604828335e-05,
"loss": 3.175,
"step": 4410
},
{
"epoch": 0.95,
"learning_rate": 4.6684061544732996e-05,
"loss": 3.1917,
"step": 4420
},
{
"epoch": 0.96,
"learning_rate": 4.66666410786229e-05,
"loss": 3.1885,
"step": 4430
},
{
"epoch": 0.96,
"learning_rate": 4.664917824056607e-05,
"loss": 3.23,
"step": 4440
},
{
"epoch": 0.96,
"learning_rate": 4.6631673064713347e-05,
"loss": 3.1515,
"step": 4450
},
{
"epoch": 0.96,
"learning_rate": 4.6614125585298404e-05,
"loss": 3.1813,
"step": 4460
},
{
"epoch": 0.97,
"learning_rate": 4.659653583663762e-05,
"loss": 3.2451,
"step": 4470
},
{
"epoch": 0.97,
"learning_rate": 4.657890385313004e-05,
"loss": 3.1761,
"step": 4480
},
{
"epoch": 0.97,
"learning_rate": 4.656122966925733e-05,
"loss": 3.1553,
"step": 4490
},
{
"epoch": 0.97,
"learning_rate": 4.654351331958365e-05,
"loss": 3.1395,
"step": 4500
},
{
"epoch": 0.97,
"learning_rate": 4.6525754838755635e-05,
"loss": 3.1984,
"step": 4510
},
{
"epoch": 0.98,
"learning_rate": 4.6507954261502324e-05,
"loss": 3.1204,
"step": 4520
},
{
"epoch": 0.98,
"learning_rate": 4.649011162263506e-05,
"loss": 3.1619,
"step": 4530
},
{
"epoch": 0.98,
"learning_rate": 4.6472226957047473e-05,
"loss": 3.2132,
"step": 4540
},
{
"epoch": 0.98,
"learning_rate": 4.645430029971534e-05,
"loss": 3.1963,
"step": 4550
},
{
"epoch": 0.98,
"learning_rate": 4.643633168569659e-05,
"loss": 3.205,
"step": 4560
},
{
"epoch": 0.99,
"learning_rate": 4.641832115013119e-05,
"loss": 3.2537,
"step": 4570
},
{
"epoch": 0.99,
"learning_rate": 4.6400268728241106e-05,
"loss": 3.1919,
"step": 4580
},
{
"epoch": 0.99,
"learning_rate": 4.63821744553302e-05,
"loss": 3.1815,
"step": 4590
},
{
"epoch": 0.99,
"learning_rate": 4.636403836678419e-05,
"loss": 3.2031,
"step": 4600
},
{
"epoch": 1.0,
"learning_rate": 4.634586049807056e-05,
"loss": 3.2011,
"step": 4610
},
{
"epoch": 1.0,
"learning_rate": 4.6327640884738507e-05,
"loss": 3.1694,
"step": 4620
},
{
"epoch": 1.0,
"learning_rate": 4.630937956241887e-05,
"loss": 3.2151,
"step": 4630
},
{
"epoch": 1.0,
"eval_loss": 3.193521499633789,
"eval_runtime": 191.4113,
"eval_samples_per_second": 774.353,
"eval_steps_per_second": 24.199,
"step": 4632
},
{
"epoch": 1.0,
"learning_rate": 4.629107656682405e-05,
"loss": 3.2192,
"step": 4640
},
{
"epoch": 1.0,
"learning_rate": 4.6272731933747945e-05,
"loss": 3.1595,
"step": 4650
},
{
"epoch": 1.01,
"learning_rate": 4.625434569906587e-05,
"loss": 3.1397,
"step": 4660
},
{
"epoch": 1.01,
"learning_rate": 4.6235917898734526e-05,
"loss": 3.1916,
"step": 4670
},
{
"epoch": 1.01,
"learning_rate": 4.6217448568791874e-05,
"loss": 3.1508,
"step": 4680
},
{
"epoch": 1.01,
"learning_rate": 4.619893774535711e-05,
"loss": 3.2277,
"step": 4690
},
{
"epoch": 1.01,
"learning_rate": 4.6180385464630546e-05,
"loss": 3.1812,
"step": 4700
},
{
"epoch": 1.02,
"learning_rate": 4.616179176289361e-05,
"loss": 3.1803,
"step": 4710
},
{
"epoch": 1.02,
"learning_rate": 4.614315667650872e-05,
"loss": 3.2129,
"step": 4720
},
{
"epoch": 1.02,
"learning_rate": 4.61244802419192e-05,
"loss": 3.1907,
"step": 4730
},
{
"epoch": 1.02,
"learning_rate": 4.610576249564926e-05,
"loss": 3.2011,
"step": 4740
},
{
"epoch": 1.03,
"learning_rate": 4.608700347430392e-05,
"loss": 3.1798,
"step": 4750
},
{
"epoch": 1.03,
"learning_rate": 4.606820321456887e-05,
"loss": 3.1975,
"step": 4760
},
{
"epoch": 1.03,
"learning_rate": 4.6049361753210496e-05,
"loss": 3.2008,
"step": 4770
},
{
"epoch": 1.03,
"learning_rate": 4.603047912707572e-05,
"loss": 3.2043,
"step": 4780
},
{
"epoch": 1.03,
"learning_rate": 4.6011555373091994e-05,
"loss": 3.2204,
"step": 4790
},
{
"epoch": 1.04,
"learning_rate": 4.5992590528267185e-05,
"loss": 3.2014,
"step": 4800
},
{
"epoch": 1.04,
"learning_rate": 4.5973584629689524e-05,
"loss": 3.2395,
"step": 4810
},
{
"epoch": 1.04,
"learning_rate": 4.5954537714527534e-05,
"loss": 3.2104,
"step": 4820
},
{
"epoch": 1.04,
"learning_rate": 4.593544982002994e-05,
"loss": 3.1612,
"step": 4830
},
{
"epoch": 1.04,
"learning_rate": 4.591632098352562e-05,
"loss": 3.1777,
"step": 4840
},
{
"epoch": 1.05,
"learning_rate": 4.5897151242423504e-05,
"loss": 3.2225,
"step": 4850
},
{
"epoch": 1.05,
"learning_rate": 4.5877940634212524e-05,
"loss": 3.2241,
"step": 4860
},
{
"epoch": 1.05,
"learning_rate": 4.5858689196461545e-05,
"loss": 3.1808,
"step": 4870
},
{
"epoch": 1.05,
"learning_rate": 4.583939696681926e-05,
"loss": 3.1117,
"step": 4880
},
{
"epoch": 1.06,
"learning_rate": 4.582006398301414e-05,
"loss": 3.1607,
"step": 4890
},
{
"epoch": 1.06,
"learning_rate": 4.580069028285437e-05,
"loss": 3.1559,
"step": 4900
},
{
"epoch": 1.06,
"learning_rate": 4.578127590422774e-05,
"loss": 3.1716,
"step": 4910
},
{
"epoch": 1.06,
"learning_rate": 4.576182088510161e-05,
"loss": 3.1874,
"step": 4920
},
{
"epoch": 1.06,
"learning_rate": 4.5742325263522825e-05,
"loss": 3.2111,
"step": 4930
},
{
"epoch": 1.07,
"learning_rate": 4.572278907761759e-05,
"loss": 3.2195,
"step": 4940
},
{
"epoch": 1.07,
"learning_rate": 4.570321236559149e-05,
"loss": 3.1611,
"step": 4950
},
{
"epoch": 1.07,
"learning_rate": 4.568359516572933e-05,
"loss": 3.1852,
"step": 4960
},
{
"epoch": 1.07,
"learning_rate": 4.566393751639512e-05,
"loss": 3.2029,
"step": 4970
},
{
"epoch": 1.08,
"learning_rate": 4.5644239456031953e-05,
"loss": 3.1492,
"step": 4980
},
{
"epoch": 1.08,
"learning_rate": 4.562450102316196e-05,
"loss": 3.1708,
"step": 4990
},
{
"epoch": 1.08,
"learning_rate": 4.560472225638622e-05,
"loss": 3.1848,
"step": 5000
},
{
"epoch": 1.08,
"learning_rate": 4.558490319438471e-05,
"loss": 3.1712,
"step": 5010
},
{
"epoch": 1.08,
"learning_rate": 4.556504387591618e-05,
"loss": 3.1685,
"step": 5020
},
{
"epoch": 1.09,
"learning_rate": 4.554514433981812e-05,
"loss": 3.1802,
"step": 5030
},
{
"epoch": 1.09,
"learning_rate": 4.552520462500668e-05,
"loss": 3.1475,
"step": 5040
},
{
"epoch": 1.09,
"learning_rate": 4.5505224770476576e-05,
"loss": 3.1959,
"step": 5050
},
{
"epoch": 1.09,
"learning_rate": 4.548520481530102e-05,
"loss": 3.176,
"step": 5060
},
{
"epoch": 1.09,
"learning_rate": 4.5465144798631655e-05,
"loss": 3.1767,
"step": 5070
},
{
"epoch": 1.1,
"learning_rate": 4.544504475969846e-05,
"loss": 3.1642,
"step": 5080
},
{
"epoch": 1.1,
"learning_rate": 4.542490473780968e-05,
"loss": 3.1974,
"step": 5090
},
{
"epoch": 1.1,
"learning_rate": 4.540472477235177e-05,
"loss": 3.1425,
"step": 5100
},
{
"epoch": 1.1,
"learning_rate": 4.538450490278928e-05,
"loss": 3.2191,
"step": 5110
},
{
"epoch": 1.11,
"learning_rate": 4.536424516866482e-05,
"loss": 3.1307,
"step": 5120
},
{
"epoch": 1.11,
"learning_rate": 4.534394560959893e-05,
"loss": 3.1451,
"step": 5130
},
{
"epoch": 1.11,
"learning_rate": 4.5323606265290045e-05,
"loss": 3.152,
"step": 5140
},
{
"epoch": 1.11,
"learning_rate": 4.5303227175514426e-05,
"loss": 3.2005,
"step": 5150
},
{
"epoch": 1.11,
"learning_rate": 4.528280838012605e-05,
"loss": 3.214,
"step": 5160
},
{
"epoch": 1.12,
"learning_rate": 4.5262349919056514e-05,
"loss": 3.1606,
"step": 5170
},
{
"epoch": 1.12,
"learning_rate": 4.524185183231503e-05,
"loss": 3.1831,
"step": 5180
},
{
"epoch": 1.12,
"learning_rate": 4.522131415998828e-05,
"loss": 3.2262,
"step": 5190
},
{
"epoch": 1.12,
"learning_rate": 4.520073694224036e-05,
"loss": 3.188,
"step": 5200
},
{
"epoch": 1.12,
"learning_rate": 4.518012021931271e-05,
"loss": 3.2036,
"step": 5210
},
{
"epoch": 1.13,
"learning_rate": 4.515946403152403e-05,
"loss": 3.196,
"step": 5220
},
{
"epoch": 1.13,
"learning_rate": 4.5138768419270184e-05,
"loss": 3.1898,
"step": 5230
},
{
"epoch": 1.13,
"learning_rate": 4.511803342302415e-05,
"loss": 3.1713,
"step": 5240
},
{
"epoch": 1.13,
"learning_rate": 4.509725908333592e-05,
"loss": 3.1812,
"step": 5250
},
{
"epoch": 1.14,
"learning_rate": 4.507644544083244e-05,
"loss": 3.1826,
"step": 5260
},
{
"epoch": 1.14,
"learning_rate": 4.505559253621748e-05,
"loss": 3.1169,
"step": 5270
},
{
"epoch": 1.14,
"learning_rate": 4.503470041027165e-05,
"loss": 3.1776,
"step": 5280
},
{
"epoch": 1.14,
"learning_rate": 4.5013769103852204e-05,
"loss": 3.1778,
"step": 5290
},
{
"epoch": 1.14,
"learning_rate": 4.499279865789307e-05,
"loss": 3.1548,
"step": 5300
},
{
"epoch": 1.15,
"learning_rate": 4.497178911340467e-05,
"loss": 3.1902,
"step": 5310
},
{
"epoch": 1.15,
"learning_rate": 4.495074051147393e-05,
"loss": 3.1998,
"step": 5320
},
{
"epoch": 1.15,
"learning_rate": 4.492965289326414e-05,
"loss": 3.2263,
"step": 5330
},
{
"epoch": 1.15,
"learning_rate": 4.4908526300014885e-05,
"loss": 3.174,
"step": 5340
},
{
"epoch": 1.16,
"learning_rate": 4.488736077304198e-05,
"loss": 3.2044,
"step": 5350
},
{
"epoch": 1.16,
"learning_rate": 4.486615635373738e-05,
"loss": 3.1525,
"step": 5360
},
{
"epoch": 1.16,
"learning_rate": 4.484491308356909e-05,
"loss": 3.1919,
"step": 5370
},
{
"epoch": 1.16,
"learning_rate": 4.482363100408111e-05,
"loss": 3.1455,
"step": 5380
},
{
"epoch": 1.16,
"learning_rate": 4.4802310156893314e-05,
"loss": 3.1776,
"step": 5390
},
{
"epoch": 1.17,
"learning_rate": 4.478095058370141e-05,
"loss": 3.163,
"step": 5400
},
{
"epoch": 1.17,
"learning_rate": 4.475955232627684e-05,
"loss": 3.2097,
"step": 5410
},
{
"epoch": 1.17,
"learning_rate": 4.4738115426466675e-05,
"loss": 3.2074,
"step": 5420
},
{
"epoch": 1.17,
"learning_rate": 4.4716639926193595e-05,
"loss": 3.2065,
"step": 5430
},
{
"epoch": 1.17,
"learning_rate": 4.4695125867455725e-05,
"loss": 3.1484,
"step": 5440
},
{
"epoch": 1.18,
"learning_rate": 4.467357329232663e-05,
"loss": 3.1814,
"step": 5450
},
{
"epoch": 1.18,
"learning_rate": 4.465198224295518e-05,
"loss": 3.0919,
"step": 5460
},
{
"epoch": 1.18,
"learning_rate": 4.4630352761565494e-05,
"loss": 3.1578,
"step": 5470
},
{
"epoch": 1.18,
"learning_rate": 4.4608684890456845e-05,
"loss": 3.1611,
"step": 5480
},
{
"epoch": 1.19,
"learning_rate": 4.458697867200359e-05,
"loss": 3.1136,
"step": 5490
},
{
"epoch": 1.19,
"learning_rate": 4.456523414865507e-05,
"loss": 3.2022,
"step": 5500
},
{
"epoch": 1.19,
"learning_rate": 4.454345136293554e-05,
"loss": 3.1829,
"step": 5510
},
{
"epoch": 1.19,
"learning_rate": 4.45216303574441e-05,
"loss": 3.2048,
"step": 5520
},
{
"epoch": 1.19,
"learning_rate": 4.4499771174854554e-05,
"loss": 3.18,
"step": 5530
},
{
"epoch": 1.2,
"learning_rate": 4.44778738579154e-05,
"loss": 3.1876,
"step": 5540
},
{
"epoch": 1.2,
"learning_rate": 4.445593844944969e-05,
"loss": 3.1565,
"step": 5550
},
{
"epoch": 1.2,
"learning_rate": 4.4433964992355e-05,
"loss": 3.1507,
"step": 5560
},
{
"epoch": 1.2,
"learning_rate": 4.441195352960328e-05,
"loss": 3.1713,
"step": 5570
},
{
"epoch": 1.2,
"learning_rate": 4.4389904104240825e-05,
"loss": 3.1927,
"step": 5580
},
{
"epoch": 1.21,
"learning_rate": 4.436781675938817e-05,
"loss": 3.1955,
"step": 5590
},
{
"epoch": 1.21,
"learning_rate": 4.434569153824e-05,
"loss": 3.1942,
"step": 5600
},
{
"epoch": 1.21,
"learning_rate": 4.432352848406507e-05,
"loss": 3.1739,
"step": 5610
},
{
"epoch": 1.21,
"learning_rate": 4.430132764020614e-05,
"loss": 3.1967,
"step": 5620
},
{
"epoch": 1.22,
"learning_rate": 4.4279089050079845e-05,
"loss": 3.2066,
"step": 5630
},
{
"epoch": 1.22,
"learning_rate": 4.4256812757176655e-05,
"loss": 3.1784,
"step": 5640
},
{
"epoch": 1.22,
"learning_rate": 4.4234498805060783e-05,
"loss": 3.2184,
"step": 5650
},
{
"epoch": 1.22,
"learning_rate": 4.421214723737007e-05,
"loss": 3.1781,
"step": 5660
},
{
"epoch": 1.22,
"learning_rate": 4.418975809781593e-05,
"loss": 3.2037,
"step": 5670
},
{
"epoch": 1.23,
"learning_rate": 4.416733143018324e-05,
"loss": 3.1782,
"step": 5680
},
{
"epoch": 1.23,
"learning_rate": 4.4144867278330284e-05,
"loss": 3.1577,
"step": 5690
},
{
"epoch": 1.23,
"learning_rate": 4.4122365686188635e-05,
"loss": 3.2047,
"step": 5700
},
{
"epoch": 1.23,
"learning_rate": 4.409982669776312e-05,
"loss": 3.1932,
"step": 5710
},
{
"epoch": 1.23,
"learning_rate": 4.407725035713166e-05,
"loss": 3.2061,
"step": 5720
},
{
"epoch": 1.24,
"learning_rate": 4.405463670844523e-05,
"loss": 3.1846,
"step": 5730
},
{
"epoch": 1.24,
"learning_rate": 4.403198579592779e-05,
"loss": 3.2353,
"step": 5740
},
{
"epoch": 1.24,
"learning_rate": 4.400929766387615e-05,
"loss": 3.1599,
"step": 5750
},
{
"epoch": 1.24,
"learning_rate": 4.398657235665991e-05,
"loss": 3.1425,
"step": 5760
},
{
"epoch": 1.25,
"learning_rate": 4.396380991872139e-05,
"loss": 3.1801,
"step": 5770
},
{
"epoch": 1.25,
"learning_rate": 4.39410103945755e-05,
"loss": 3.1458,
"step": 5780
},
{
"epoch": 1.25,
"learning_rate": 4.3918173828809686e-05,
"loss": 3.1369,
"step": 5790
},
{
"epoch": 1.25,
"learning_rate": 4.3895300266083847e-05,
"loss": 3.1809,
"step": 5800
},
{
"epoch": 1.25,
"learning_rate": 4.387238975113022e-05,
"loss": 3.162,
"step": 5810
},
{
"epoch": 1.26,
"learning_rate": 4.3849442328753296e-05,
"loss": 3.2025,
"step": 5820
},
{
"epoch": 1.26,
"learning_rate": 4.3826458043829764e-05,
"loss": 3.2123,
"step": 5830
},
{
"epoch": 1.26,
"learning_rate": 4.3803436941308405e-05,
"loss": 3.1533,
"step": 5840
},
{
"epoch": 1.26,
"learning_rate": 4.3780379066209985e-05,
"loss": 3.1517,
"step": 5850
},
{
"epoch": 1.27,
"learning_rate": 4.375728446362719e-05,
"loss": 3.1786,
"step": 5860
},
{
"epoch": 1.27,
"learning_rate": 4.373415317872454e-05,
"loss": 3.1428,
"step": 5870
},
{
"epoch": 1.27,
"learning_rate": 4.371098525673826e-05,
"loss": 3.195,
"step": 5880
},
{
"epoch": 1.27,
"learning_rate": 4.3687780742976275e-05,
"loss": 3.1393,
"step": 5890
},
{
"epoch": 1.27,
"learning_rate": 4.366453968281803e-05,
"loss": 3.1745,
"step": 5900
},
{
"epoch": 1.28,
"learning_rate": 4.3641262121714455e-05,
"loss": 3.1386,
"step": 5910
},
{
"epoch": 1.28,
"learning_rate": 4.3617948105187864e-05,
"loss": 3.2105,
"step": 5920
},
{
"epoch": 1.28,
"learning_rate": 4.359459767883186e-05,
"loss": 3.1338,
"step": 5930
},
{
"epoch": 1.28,
"learning_rate": 4.357121088831124e-05,
"loss": 3.148,
"step": 5940
},
{
"epoch": 1.28,
"learning_rate": 4.3547787779361955e-05,
"loss": 3.1411,
"step": 5950
},
{
"epoch": 1.29,
"learning_rate": 4.352432839779093e-05,
"loss": 3.1104,
"step": 5960
},
{
"epoch": 1.29,
"learning_rate": 4.350083278947606e-05,
"loss": 3.1404,
"step": 5970
},
{
"epoch": 1.29,
"learning_rate": 4.3477301000366075e-05,
"loss": 3.1452,
"step": 5980
},
{
"epoch": 1.29,
"learning_rate": 4.3453733076480466e-05,
"loss": 3.1881,
"step": 5990
},
{
"epoch": 1.3,
"learning_rate": 4.343012906390937e-05,
"loss": 3.1552,
"step": 6000
},
{
"epoch": 1.3,
"learning_rate": 4.3406489008813535e-05,
"loss": 3.2142,
"step": 6010
},
{
"epoch": 1.3,
"learning_rate": 4.338281295742417e-05,
"loss": 3.1566,
"step": 6020
},
{
"epoch": 1.3,
"learning_rate": 4.3359100956042885e-05,
"loss": 3.1937,
"step": 6030
},
{
"epoch": 1.3,
"learning_rate": 4.3335353051041606e-05,
"loss": 3.2105,
"step": 6040
},
{
"epoch": 1.31,
"learning_rate": 4.331156928886245e-05,
"loss": 3.1797,
"step": 6050
},
{
"epoch": 1.31,
"learning_rate": 4.328774971601767e-05,
"loss": 3.1629,
"step": 6060
},
{
"epoch": 1.31,
"learning_rate": 4.326389437908956e-05,
"loss": 3.2062,
"step": 6070
},
{
"epoch": 1.31,
"learning_rate": 4.324000332473035e-05,
"loss": 3.2231,
"step": 6080
},
{
"epoch": 1.31,
"learning_rate": 4.3216076599662114e-05,
"loss": 3.1946,
"step": 6090
},
{
"epoch": 1.32,
"learning_rate": 4.319211425067668e-05,
"loss": 3.1879,
"step": 6100
},
{
"epoch": 1.32,
"learning_rate": 4.3168116324635556e-05,
"loss": 3.1352,
"step": 6110
},
{
"epoch": 1.32,
"learning_rate": 4.314408286846982e-05,
"loss": 3.1791,
"step": 6120
},
{
"epoch": 1.32,
"learning_rate": 4.3120013929180055e-05,
"loss": 3.1685,
"step": 6130
},
{
"epoch": 1.33,
"learning_rate": 4.309590955383619e-05,
"loss": 3.1246,
"step": 6140
},
{
"epoch": 1.33,
"learning_rate": 4.307176978957748e-05,
"loss": 3.1601,
"step": 6150
},
{
"epoch": 1.33,
"learning_rate": 4.30475946836124e-05,
"loss": 3.1144,
"step": 6160
},
{
"epoch": 1.33,
"learning_rate": 4.3023384283218525e-05,
"loss": 3.1496,
"step": 6170
},
{
"epoch": 1.33,
"learning_rate": 4.299913863574245e-05,
"loss": 3.2308,
"step": 6180
},
{
"epoch": 1.34,
"learning_rate": 4.297485778859971e-05,
"loss": 3.1532,
"step": 6190
},
{
"epoch": 1.34,
"learning_rate": 4.295054178927468e-05,
"loss": 3.1449,
"step": 6200
},
{
"epoch": 1.34,
"learning_rate": 4.2926190685320454e-05,
"loss": 3.1575,
"step": 6210
},
{
"epoch": 1.34,
"learning_rate": 4.290180452435881e-05,
"loss": 3.1471,
"step": 6220
},
{
"epoch": 1.34,
"learning_rate": 4.287738335408007e-05,
"loss": 3.1524,
"step": 6230
},
{
"epoch": 1.35,
"learning_rate": 4.285292722224302e-05,
"loss": 3.143,
"step": 6240
},
{
"epoch": 1.35,
"learning_rate": 4.282843617667481e-05,
"loss": 3.1605,
"step": 6250
},
{
"epoch": 1.35,
"learning_rate": 4.2803910265270905e-05,
"loss": 3.1333,
"step": 6260
},
{
"epoch": 1.35,
"learning_rate": 4.2779349535994907e-05,
"loss": 3.1821,
"step": 6270
},
{
"epoch": 1.36,
"learning_rate": 4.2754754036878534e-05,
"loss": 3.177,
"step": 6280
},
{
"epoch": 1.36,
"learning_rate": 4.273012381602151e-05,
"loss": 3.1561,
"step": 6290
},
{
"epoch": 1.36,
"learning_rate": 4.270545892159142e-05,
"loss": 3.1645,
"step": 6300
},
{
"epoch": 1.36,
"learning_rate": 4.2680759401823724e-05,
"loss": 3.1379,
"step": 6310
},
{
"epoch": 1.36,
"learning_rate": 4.265602530502153e-05,
"loss": 3.1522,
"step": 6320
},
{
"epoch": 1.37,
"learning_rate": 4.263125667955561e-05,
"loss": 3.144,
"step": 6330
},
{
"epoch": 1.37,
"learning_rate": 4.2606453573864236e-05,
"loss": 3.1868,
"step": 6340
},
{
"epoch": 1.37,
"learning_rate": 4.258161603645313e-05,
"loss": 3.1529,
"step": 6350
},
{
"epoch": 1.37,
"learning_rate": 4.255674411589534e-05,
"loss": 3.1846,
"step": 6360
},
{
"epoch": 1.38,
"learning_rate": 4.2531837860831154e-05,
"loss": 3.1615,
"step": 6370
},
{
"epoch": 1.38,
"learning_rate": 4.2506897319968e-05,
"loss": 3.1571,
"step": 6380
},
{
"epoch": 1.38,
"learning_rate": 4.2481922542080375e-05,
"loss": 3.1169,
"step": 6390
},
{
"epoch": 1.38,
"learning_rate": 4.245691357600971e-05,
"loss": 3.1826,
"step": 6400
},
{
"epoch": 1.38,
"learning_rate": 4.2431870470664314e-05,
"loss": 3.1751,
"step": 6410
},
{
"epoch": 1.39,
"learning_rate": 4.240679327501924e-05,
"loss": 3.1996,
"step": 6420
},
{
"epoch": 1.39,
"learning_rate": 4.238168203811623e-05,
"loss": 3.1304,
"step": 6430
},
{
"epoch": 1.39,
"learning_rate": 4.235653680906358e-05,
"loss": 3.1716,
"step": 6440
},
{
"epoch": 1.39,
"learning_rate": 4.233135763703607e-05,
"loss": 3.1825,
"step": 6450
},
{
"epoch": 1.39,
"learning_rate": 4.230614457127488e-05,
"loss": 3.1719,
"step": 6460
},
{
"epoch": 1.4,
"learning_rate": 4.228089766108742e-05,
"loss": 3.15,
"step": 6470
},
{
"epoch": 1.4,
"learning_rate": 4.225561695584733e-05,
"loss": 3.1698,
"step": 6480
},
{
"epoch": 1.4,
"learning_rate": 4.2230302504994355e-05,
"loss": 3.1423,
"step": 6490
},
{
"epoch": 1.4,
"learning_rate": 4.220495435803419e-05,
"loss": 3.2055,
"step": 6500
},
{
"epoch": 1.41,
"learning_rate": 4.217957256453844e-05,
"loss": 3.1394,
"step": 6510
},
{
"epoch": 1.41,
"learning_rate": 4.215415717414454e-05,
"loss": 3.1618,
"step": 6520
},
{
"epoch": 1.41,
"learning_rate": 4.2128708236555584e-05,
"loss": 3.1124,
"step": 6530
},
{
"epoch": 1.41,
"learning_rate": 4.2103225801540314e-05,
"loss": 3.1919,
"step": 6540
},
{
"epoch": 1.41,
"learning_rate": 4.207770991893294e-05,
"loss": 3.1607,
"step": 6550
},
{
"epoch": 1.42,
"learning_rate": 4.205216063863312e-05,
"loss": 3.1826,
"step": 6560
},
{
"epoch": 1.42,
"learning_rate": 4.2026578010605796e-05,
"loss": 3.1262,
"step": 6570
},
{
"epoch": 1.42,
"learning_rate": 4.2000962084881154e-05,
"loss": 3.1614,
"step": 6580
},
{
"epoch": 1.42,
"learning_rate": 4.197531291155447e-05,
"loss": 3.1091,
"step": 6590
},
{
"epoch": 1.42,
"learning_rate": 4.1949630540786055e-05,
"loss": 3.1554,
"step": 6600
},
{
"epoch": 1.43,
"learning_rate": 4.192391502280114e-05,
"loss": 3.1163,
"step": 6610
},
{
"epoch": 1.43,
"learning_rate": 4.1898166407889785e-05,
"loss": 3.1649,
"step": 6620
},
{
"epoch": 1.43,
"learning_rate": 4.187238474640675e-05,
"loss": 3.1235,
"step": 6630
},
{
"epoch": 1.43,
"learning_rate": 4.184657008877148e-05,
"loss": 3.1343,
"step": 6640
},
{
"epoch": 1.44,
"learning_rate": 4.1820722485467865e-05,
"loss": 3.1873,
"step": 6650
},
{
"epoch": 1.44,
"learning_rate": 4.179484198704431e-05,
"loss": 3.1574,
"step": 6660
},
{
"epoch": 1.44,
"learning_rate": 4.176892864411348e-05,
"loss": 3.1684,
"step": 6670
},
{
"epoch": 1.44,
"learning_rate": 4.174298250735232e-05,
"loss": 3.2082,
"step": 6680
},
{
"epoch": 1.44,
"learning_rate": 4.171700362750188e-05,
"loss": 3.1431,
"step": 6690
},
{
"epoch": 1.45,
"learning_rate": 4.169099205536725e-05,
"loss": 3.116,
"step": 6700
},
{
"epoch": 1.45,
"learning_rate": 4.166494784181747e-05,
"loss": 3.1468,
"step": 6710
},
{
"epoch": 1.45,
"learning_rate": 4.163887103778539e-05,
"loss": 3.1494,
"step": 6720
},
{
"epoch": 1.45,
"learning_rate": 4.161276169426762e-05,
"loss": 3.1662,
"step": 6730
},
{
"epoch": 1.46,
"learning_rate": 4.158661986232437e-05,
"loss": 3.1573,
"step": 6740
},
{
"epoch": 1.46,
"learning_rate": 4.1560445593079424e-05,
"loss": 3.1701,
"step": 6750
},
{
"epoch": 1.46,
"learning_rate": 4.1534238937719984e-05,
"loss": 3.1408,
"step": 6760
},
{
"epoch": 1.46,
"learning_rate": 4.150799994749658e-05,
"loss": 3.1655,
"step": 6770
},
{
"epoch": 1.46,
"learning_rate": 4.148172867372299e-05,
"loss": 3.1663,
"step": 6780
},
{
"epoch": 1.47,
"learning_rate": 4.145542516777612e-05,
"loss": 3.1723,
"step": 6790
},
{
"epoch": 1.47,
"learning_rate": 4.1429089481095906e-05,
"loss": 3.1329,
"step": 6800
},
{
"epoch": 1.47,
"learning_rate": 4.140272166518523e-05,
"loss": 3.133,
"step": 6810
},
{
"epoch": 1.47,
"learning_rate": 4.13763217716098e-05,
"loss": 3.1302,
"step": 6820
},
{
"epoch": 1.47,
"learning_rate": 4.134988985199806e-05,
"loss": 3.118,
"step": 6830
},
{
"epoch": 1.48,
"learning_rate": 4.132342595804108e-05,
"loss": 3.164,
"step": 6840
},
{
"epoch": 1.48,
"learning_rate": 4.129693014149245e-05,
"loss": 3.1411,
"step": 6850
},
{
"epoch": 1.48,
"learning_rate": 4.127040245416821e-05,
"loss": 3.1414,
"step": 6860
},
{
"epoch": 1.48,
"learning_rate": 4.124384294794672e-05,
"loss": 3.1436,
"step": 6870
},
{
"epoch": 1.49,
"learning_rate": 4.121725167476855e-05,
"loss": 3.2189,
"step": 6880
},
{
"epoch": 1.49,
"learning_rate": 4.1190628686636425e-05,
"loss": 3.1578,
"step": 6890
},
{
"epoch": 1.49,
"learning_rate": 4.116397403561507e-05,
"loss": 3.1702,
"step": 6900
},
{
"epoch": 1.49,
"learning_rate": 4.113728777383112e-05,
"loss": 3.169,
"step": 6910
},
{
"epoch": 1.49,
"learning_rate": 4.111056995347308e-05,
"loss": 3.1692,
"step": 6920
},
{
"epoch": 1.5,
"learning_rate": 4.1083820626791116e-05,
"loss": 3.168,
"step": 6930
},
{
"epoch": 1.5,
"learning_rate": 4.105703984609702e-05,
"loss": 3.1242,
"step": 6940
},
{
"epoch": 1.5,
"learning_rate": 4.1030227663764135e-05,
"loss": 3.2433,
"step": 6950
},
{
"epoch": 1.5,
"learning_rate": 4.100338413222716e-05,
"loss": 3.187,
"step": 6960
},
{
"epoch": 1.5,
"learning_rate": 4.0976509303982135e-05,
"loss": 3.1663,
"step": 6970
},
{
"epoch": 1.51,
"learning_rate": 4.09496032315863e-05,
"loss": 3.1137,
"step": 6980
},
{
"epoch": 1.51,
"learning_rate": 4.0922665967658006e-05,
"loss": 3.1161,
"step": 6990
},
{
"epoch": 1.51,
"learning_rate": 4.089569756487657e-05,
"loss": 3.1972,
"step": 7000
},
{
"epoch": 1.51,
"learning_rate": 4.086869807598223e-05,
"loss": 3.1001,
"step": 7010
},
{
"epoch": 1.52,
"learning_rate": 4.084166755377603e-05,
"loss": 3.1692,
"step": 7020
},
{
"epoch": 1.52,
"learning_rate": 4.081460605111966e-05,
"loss": 3.1473,
"step": 7030
},
{
"epoch": 1.52,
"learning_rate": 4.078751362093545e-05,
"loss": 3.1822,
"step": 7040
},
{
"epoch": 1.52,
"learning_rate": 4.076039031620618e-05,
"loss": 3.1269,
"step": 7050
},
{
"epoch": 1.52,
"learning_rate": 4.0733236189975e-05,
"loss": 3.1275,
"step": 7060
},
{
"epoch": 1.53,
"learning_rate": 4.070605129534536e-05,
"loss": 3.1044,
"step": 7070
},
{
"epoch": 1.53,
"learning_rate": 4.067883568548088e-05,
"loss": 3.1716,
"step": 7080
},
{
"epoch": 1.53,
"learning_rate": 4.065158941360523e-05,
"loss": 3.1244,
"step": 7090
},
{
"epoch": 1.53,
"learning_rate": 4.062431253300205e-05,
"loss": 3.1419,
"step": 7100
},
{
"epoch": 1.53,
"learning_rate": 4.059700509701485e-05,
"loss": 3.1206,
"step": 7110
},
{
"epoch": 1.54,
"learning_rate": 4.05696671590469e-05,
"loss": 3.0614,
"step": 7120
},
{
"epoch": 1.54,
"learning_rate": 4.054229877256108e-05,
"loss": 3.1817,
"step": 7130
},
{
"epoch": 1.54,
"learning_rate": 4.0514899991079876e-05,
"loss": 3.1441,
"step": 7140
},
{
"epoch": 1.54,
"learning_rate": 4.048747086818516e-05,
"loss": 3.148,
"step": 7150
},
{
"epoch": 1.55,
"learning_rate": 4.046001145751818e-05,
"loss": 3.1507,
"step": 7160
},
{
"epoch": 1.55,
"learning_rate": 4.043252181277939e-05,
"loss": 3.1917,
"step": 7170
},
{
"epoch": 1.55,
"learning_rate": 4.040500198772838e-05,
"loss": 3.1355,
"step": 7180
},
{
"epoch": 1.55,
"learning_rate": 4.037745203618377e-05,
"loss": 3.1637,
"step": 7190
},
{
"epoch": 1.55,
"learning_rate": 4.0349872012023085e-05,
"loss": 3.1545,
"step": 7200
},
{
"epoch": 1.56,
"learning_rate": 4.032226196918267e-05,
"loss": 3.1878,
"step": 7210
},
{
"epoch": 1.56,
"learning_rate": 4.029462196165756e-05,
"loss": 3.1394,
"step": 7220
},
{
"epoch": 1.56,
"learning_rate": 4.026695204350142e-05,
"loss": 3.1647,
"step": 7230
},
{
"epoch": 1.56,
"learning_rate": 4.023925226882636e-05,
"loss": 3.1424,
"step": 7240
},
{
"epoch": 1.57,
"learning_rate": 4.0211522691802924e-05,
"loss": 3.0862,
"step": 7250
},
{
"epoch": 1.57,
"learning_rate": 4.0183763366659934e-05,
"loss": 3.1946,
"step": 7260
},
{
"epoch": 1.57,
"learning_rate": 4.0155974347684353e-05,
"loss": 3.1213,
"step": 7270
},
{
"epoch": 1.57,
"learning_rate": 4.012815568922125e-05,
"loss": 3.1879,
"step": 7280
},
{
"epoch": 1.57,
"learning_rate": 4.010030744567365e-05,
"loss": 3.1317,
"step": 7290
},
{
"epoch": 1.58,
"learning_rate": 4.007242967150242e-05,
"loss": 3.1562,
"step": 7300
},
{
"epoch": 1.58,
"learning_rate": 4.0044522421226184e-05,
"loss": 3.137,
"step": 7310
},
{
"epoch": 1.58,
"learning_rate": 4.001658574942123e-05,
"loss": 3.1713,
"step": 7320
},
{
"epoch": 1.58,
"learning_rate": 3.998861971072136e-05,
"loss": 3.182,
"step": 7330
},
{
"epoch": 1.58,
"learning_rate": 3.996062435981782e-05,
"loss": 3.1464,
"step": 7340
},
{
"epoch": 1.59,
"learning_rate": 3.993259975145917e-05,
"loss": 3.1696,
"step": 7350
},
{
"epoch": 1.59,
"learning_rate": 3.99045459404512e-05,
"loss": 3.1995,
"step": 7360
},
{
"epoch": 1.59,
"learning_rate": 3.9876462981656806e-05,
"loss": 3.1175,
"step": 7370
},
{
"epoch": 1.59,
"learning_rate": 3.984835092999586e-05,
"loss": 3.1136,
"step": 7380
},
{
"epoch": 1.6,
"learning_rate": 3.982020984044517e-05,
"loss": 3.1157,
"step": 7390
},
{
"epoch": 1.6,
"learning_rate": 3.979203976803832e-05,
"loss": 3.1381,
"step": 7400
},
{
"epoch": 1.6,
"learning_rate": 3.976384076786554e-05,
"loss": 3.1718,
"step": 7410
},
{
"epoch": 1.6,
"learning_rate": 3.973561289507366e-05,
"loss": 3.1321,
"step": 7420
},
{
"epoch": 1.6,
"learning_rate": 3.9707356204865996e-05,
"loss": 3.137,
"step": 7430
},
{
"epoch": 1.61,
"learning_rate": 3.967907075250219e-05,
"loss": 3.1893,
"step": 7440
},
{
"epoch": 1.61,
"learning_rate": 3.965075659329813e-05,
"loss": 3.1326,
"step": 7450
},
{
"epoch": 1.61,
"learning_rate": 3.9622413782625854e-05,
"loss": 3.1343,
"step": 7460
},
{
"epoch": 1.61,
"learning_rate": 3.959404237591344e-05,
"loss": 3.1807,
"step": 7470
},
{
"epoch": 1.61,
"learning_rate": 3.9565642428644866e-05,
"loss": 3.1216,
"step": 7480
},
{
"epoch": 1.62,
"learning_rate": 3.953721399635995e-05,
"loss": 3.1246,
"step": 7490
},
{
"epoch": 1.62,
"learning_rate": 3.9508757134654196e-05,
"loss": 3.1527,
"step": 7500
},
{
"epoch": 1.62,
"learning_rate": 3.9480271899178724e-05,
"loss": 3.124,
"step": 7510
},
{
"epoch": 1.62,
"learning_rate": 3.945175834564011e-05,
"loss": 3.1283,
"step": 7520
},
{
"epoch": 1.63,
"learning_rate": 3.942321652980037e-05,
"loss": 3.1565,
"step": 7530
},
{
"epoch": 1.63,
"learning_rate": 3.939464650747672e-05,
"loss": 3.1486,
"step": 7540
},
{
"epoch": 1.63,
"learning_rate": 3.9366048334541585e-05,
"loss": 3.1738,
"step": 7550
},
{
"epoch": 1.63,
"learning_rate": 3.933742206692243e-05,
"loss": 3.1078,
"step": 7560
},
{
"epoch": 1.63,
"learning_rate": 3.930876776060164e-05,
"loss": 3.1508,
"step": 7570
},
{
"epoch": 1.64,
"learning_rate": 3.928008547161648e-05,
"loss": 3.1255,
"step": 7580
},
{
"epoch": 1.64,
"learning_rate": 3.925137525605891e-05,
"loss": 3.1683,
"step": 7590
},
{
"epoch": 1.64,
"learning_rate": 3.922263717007549e-05,
"loss": 3.1328,
"step": 7600
},
{
"epoch": 1.64,
"learning_rate": 3.919387126986731e-05,
"loss": 3.1678,
"step": 7610
},
{
"epoch": 1.65,
"learning_rate": 3.916507761168984e-05,
"loss": 3.1351,
"step": 7620
},
{
"epoch": 1.65,
"learning_rate": 3.913625625185285e-05,
"loss": 3.1499,
"step": 7630
},
{
"epoch": 1.65,
"learning_rate": 3.910740724672027e-05,
"loss": 3.2316,
"step": 7640
},
{
"epoch": 1.65,
"learning_rate": 3.90785306527101e-05,
"loss": 3.0708,
"step": 7650
},
{
"epoch": 1.65,
"learning_rate": 3.904962652629428e-05,
"loss": 3.1177,
"step": 7660
},
{
"epoch": 1.66,
"learning_rate": 3.9020694923998624e-05,
"loss": 3.1329,
"step": 7670
},
{
"epoch": 1.66,
"learning_rate": 3.899173590240264e-05,
"loss": 3.0913,
"step": 7680
},
{
"epoch": 1.66,
"learning_rate": 3.8962749518139486e-05,
"loss": 3.1613,
"step": 7690
},
{
"epoch": 1.66,
"learning_rate": 3.893373582789582e-05,
"loss": 3.1215,
"step": 7700
},
{
"epoch": 1.66,
"learning_rate": 3.890469488841171e-05,
"loss": 3.1652,
"step": 7710
},
{
"epoch": 1.67,
"learning_rate": 3.88756267564805e-05,
"loss": 3.1852,
"step": 7720
},
{
"epoch": 1.67,
"learning_rate": 3.884653148894871e-05,
"loss": 3.1674,
"step": 7730
},
{
"epoch": 1.67,
"learning_rate": 3.881740914271596e-05,
"loss": 3.1452,
"step": 7740
},
{
"epoch": 1.67,
"learning_rate": 3.878825977473478e-05,
"loss": 3.1016,
"step": 7750
},
{
"epoch": 1.68,
"learning_rate": 3.8759083442010584e-05,
"loss": 3.1139,
"step": 7760
},
{
"epoch": 1.68,
"learning_rate": 3.872988020160149e-05,
"loss": 3.1258,
"step": 7770
},
{
"epoch": 1.68,
"learning_rate": 3.870065011061825e-05,
"loss": 3.1106,
"step": 7780
},
{
"epoch": 1.68,
"learning_rate": 3.867139322622413e-05,
"loss": 3.1516,
"step": 7790
},
{
"epoch": 1.68,
"learning_rate": 3.864210960563478e-05,
"loss": 3.1169,
"step": 7800
},
{
"epoch": 1.69,
"learning_rate": 3.8612799306118156e-05,
"loss": 3.1084,
"step": 7810
},
{
"epoch": 1.69,
"learning_rate": 3.8583462384994374e-05,
"loss": 3.1573,
"step": 7820
},
{
"epoch": 1.69,
"learning_rate": 3.85540988996356e-05,
"loss": 3.1087,
"step": 7830
},
{
"epoch": 1.69,
"learning_rate": 3.852470890746599e-05,
"loss": 3.1467,
"step": 7840
},
{
"epoch": 1.69,
"learning_rate": 3.849529246596151e-05,
"loss": 3.1378,
"step": 7850
},
{
"epoch": 1.7,
"learning_rate": 3.846584963264983e-05,
"loss": 3.1572,
"step": 7860
},
{
"epoch": 1.7,
"learning_rate": 3.843638046511028e-05,
"loss": 3.1425,
"step": 7870
},
{
"epoch": 1.7,
"learning_rate": 3.840688502097365e-05,
"loss": 3.1609,
"step": 7880
},
{
"epoch": 1.7,
"learning_rate": 3.8377363357922156e-05,
"loss": 3.0867,
"step": 7890
},
{
"epoch": 1.71,
"learning_rate": 3.834781553368924e-05,
"loss": 3.1533,
"step": 7900
},
{
"epoch": 1.71,
"learning_rate": 3.831824160605955e-05,
"loss": 3.1032,
"step": 7910
},
{
"epoch": 1.71,
"learning_rate": 3.828864163286875e-05,
"loss": 3.1066,
"step": 7920
},
{
"epoch": 1.71,
"learning_rate": 3.8259015672003464e-05,
"loss": 3.1348,
"step": 7930
},
{
"epoch": 1.71,
"learning_rate": 3.822936378140113e-05,
"loss": 3.2036,
"step": 7940
},
{
"epoch": 1.72,
"learning_rate": 3.8199686019049894e-05,
"loss": 3.181,
"step": 7950
},
{
"epoch": 1.72,
"learning_rate": 3.816998244298849e-05,
"loss": 3.1022,
"step": 7960
},
{
"epoch": 1.72,
"learning_rate": 3.814025311130614e-05,
"loss": 3.1287,
"step": 7970
},
{
"epoch": 1.72,
"learning_rate": 3.8110498082142445e-05,
"loss": 3.1466,
"step": 7980
},
{
"epoch": 1.72,
"learning_rate": 3.808071741368723e-05,
"loss": 3.1357,
"step": 7990
},
{
"epoch": 1.73,
"learning_rate": 3.8050911164180507e-05,
"loss": 3.1281,
"step": 8000
},
{
"epoch": 1.73,
"learning_rate": 3.802107939191228e-05,
"loss": 3.1201,
"step": 8010
},
{
"epoch": 1.73,
"learning_rate": 3.7991222155222484e-05,
"loss": 3.1409,
"step": 8020
},
{
"epoch": 1.73,
"learning_rate": 3.796133951250083e-05,
"loss": 3.1032,
"step": 8030
},
{
"epoch": 1.74,
"learning_rate": 3.793143152218673e-05,
"loss": 3.1526,
"step": 8040
},
{
"epoch": 1.74,
"learning_rate": 3.790149824276919e-05,
"loss": 3.1561,
"step": 8050
},
{
"epoch": 1.74,
"learning_rate": 3.787153973278662e-05,
"loss": 3.1016,
"step": 8060
},
{
"epoch": 1.74,
"learning_rate": 3.784155605082681e-05,
"loss": 3.1152,
"step": 8070
},
{
"epoch": 1.74,
"learning_rate": 3.781154725552677e-05,
"loss": 3.1269,
"step": 8080
},
{
"epoch": 1.75,
"learning_rate": 3.778151340557261e-05,
"loss": 3.1753,
"step": 8090
},
{
"epoch": 1.75,
"learning_rate": 3.775145455969946e-05,
"loss": 3.1708,
"step": 8100
},
{
"epoch": 1.75,
"learning_rate": 3.7721370776691305e-05,
"loss": 3.1453,
"step": 8110
},
{
"epoch": 1.75,
"learning_rate": 3.769126211538092e-05,
"loss": 3.1866,
"step": 8120
},
{
"epoch": 1.76,
"learning_rate": 3.7661128634649737e-05,
"loss": 3.1268,
"step": 8130
},
{
"epoch": 1.76,
"learning_rate": 3.763097039342768e-05,
"loss": 3.1154,
"step": 8140
},
{
"epoch": 1.76,
"learning_rate": 3.760078745069316e-05,
"loss": 3.1447,
"step": 8150
},
{
"epoch": 1.76,
"learning_rate": 3.757057986547285e-05,
"loss": 3.1837,
"step": 8160
},
{
"epoch": 1.76,
"learning_rate": 3.754034769684164e-05,
"loss": 3.1581,
"step": 8170
},
{
"epoch": 1.77,
"learning_rate": 3.751009100392247e-05,
"loss": 3.126,
"step": 8180
},
{
"epoch": 1.77,
"learning_rate": 3.747980984588626e-05,
"loss": 3.0992,
"step": 8190
},
{
"epoch": 1.77,
"learning_rate": 3.744950428195178e-05,
"loss": 3.1455,
"step": 8200
},
{
"epoch": 1.77,
"learning_rate": 3.7419174371385504e-05,
"loss": 3.128,
"step": 8210
},
{
"epoch": 1.77,
"learning_rate": 3.7388820173501545e-05,
"loss": 3.0823,
"step": 8220
},
{
"epoch": 1.78,
"learning_rate": 3.735844174766149e-05,
"loss": 3.1004,
"step": 8230
},
{
"epoch": 1.78,
"learning_rate": 3.732803915327434e-05,
"loss": 3.1647,
"step": 8240
},
{
"epoch": 1.78,
"learning_rate": 3.729761244979631e-05,
"loss": 3.1464,
"step": 8250
},
{
"epoch": 1.78,
"learning_rate": 3.726716169673082e-05,
"loss": 3.1272,
"step": 8260
},
{
"epoch": 1.79,
"learning_rate": 3.723668695362827e-05,
"loss": 3.1407,
"step": 8270
},
{
"epoch": 1.79,
"learning_rate": 3.720618828008604e-05,
"loss": 3.1695,
"step": 8280
},
{
"epoch": 1.79,
"learning_rate": 3.717566573574822e-05,
"loss": 3.1004,
"step": 8290
},
{
"epoch": 1.79,
"learning_rate": 3.7145119380305674e-05,
"loss": 3.1826,
"step": 8300
},
{
"epoch": 1.79,
"learning_rate": 3.7114549273495764e-05,
"loss": 3.1372,
"step": 8310
},
{
"epoch": 1.8,
"learning_rate": 3.708395547510234e-05,
"loss": 3.1596,
"step": 8320
},
{
"epoch": 1.8,
"learning_rate": 3.7053338044955566e-05,
"loss": 3.183,
"step": 8330
},
{
"epoch": 1.8,
"learning_rate": 3.702269704293182e-05,
"loss": 3.1429,
"step": 8340
},
{
"epoch": 1.8,
"learning_rate": 3.69920325289536e-05,
"loss": 3.1234,
"step": 8350
},
{
"epoch": 1.8,
"learning_rate": 3.6961344562989354e-05,
"loss": 3.1404,
"step": 8360
},
{
"epoch": 1.81,
"learning_rate": 3.6930633205053414e-05,
"loss": 3.1519,
"step": 8370
},
{
"epoch": 1.81,
"learning_rate": 3.6899898515205856e-05,
"loss": 3.174,
"step": 8380
},
{
"epoch": 1.81,
"learning_rate": 3.6869140553552374e-05,
"loss": 3.147,
"step": 8390
},
{
"epoch": 1.81,
"learning_rate": 3.683835938024418e-05,
"loss": 3.1018,
"step": 8400
},
{
"epoch": 1.82,
"learning_rate": 3.680755505547788e-05,
"loss": 3.1384,
"step": 8410
},
{
"epoch": 1.82,
"learning_rate": 3.677672763949536e-05,
"loss": 3.0691,
"step": 8420
},
{
"epoch": 1.82,
"learning_rate": 3.674587719258365e-05,
"loss": 3.1424,
"step": 8430
},
{
"epoch": 1.82,
"learning_rate": 3.671500377507482e-05,
"loss": 3.144,
"step": 8440
},
{
"epoch": 1.82,
"learning_rate": 3.668410744734589e-05,
"loss": 3.1379,
"step": 8450
},
{
"epoch": 1.83,
"learning_rate": 3.665318826981867e-05,
"loss": 3.1232,
"step": 8460
},
{
"epoch": 1.83,
"learning_rate": 3.662224630295963e-05,
"loss": 3.12,
"step": 8470
},
{
"epoch": 1.83,
"learning_rate": 3.659128160727983e-05,
"loss": 3.1597,
"step": 8480
},
{
"epoch": 1.83,
"learning_rate": 3.656029424333479e-05,
"loss": 3.1177,
"step": 8490
},
{
"epoch": 1.84,
"learning_rate": 3.652928427172434e-05,
"loss": 3.156,
"step": 8500
},
{
"epoch": 1.84,
"learning_rate": 3.649825175309253e-05,
"loss": 3.128,
"step": 8510
},
{
"epoch": 1.84,
"learning_rate": 3.646719674812751e-05,
"loss": 3.1341,
"step": 8520
},
{
"epoch": 1.84,
"learning_rate": 3.643611931756139e-05,
"loss": 3.1123,
"step": 8530
},
{
"epoch": 1.84,
"learning_rate": 3.640501952217015e-05,
"loss": 3.1392,
"step": 8540
},
{
"epoch": 1.85,
"learning_rate": 3.637389742277348e-05,
"loss": 3.132,
"step": 8550
},
{
"epoch": 1.85,
"learning_rate": 3.6342753080234754e-05,
"loss": 3.0704,
"step": 8560
},
{
"epoch": 1.85,
"learning_rate": 3.6311586555460776e-05,
"loss": 3.1512,
"step": 8570
},
{
"epoch": 1.85,
"learning_rate": 3.6280397909401756e-05,
"loss": 3.081,
"step": 8580
},
{
"epoch": 1.85,
"learning_rate": 3.624918720305117e-05,
"loss": 3.1163,
"step": 8590
},
{
"epoch": 1.86,
"learning_rate": 3.621795449744562e-05,
"loss": 3.096,
"step": 8600
},
{
"epoch": 1.86,
"learning_rate": 3.6186699853664755e-05,
"loss": 3.0679,
"step": 8610
},
{
"epoch": 1.86,
"learning_rate": 3.615542333283112e-05,
"loss": 3.1313,
"step": 8620
},
{
"epoch": 1.86,
"learning_rate": 3.6124124996110015e-05,
"loss": 3.0716,
"step": 8630
},
{
"epoch": 1.87,
"learning_rate": 3.609280490470944e-05,
"loss": 3.0907,
"step": 8640
},
{
"epoch": 1.87,
"learning_rate": 3.6061463119879915e-05,
"loss": 3.136,
"step": 8650
},
{
"epoch": 1.87,
"learning_rate": 3.60300997029144e-05,
"loss": 3.1074,
"step": 8660
},
{
"epoch": 1.87,
"learning_rate": 3.5998714715148165e-05,
"loss": 3.0909,
"step": 8670
},
{
"epoch": 1.87,
"learning_rate": 3.596730821795863e-05,
"loss": 3.0746,
"step": 8680
},
{
"epoch": 1.88,
"learning_rate": 3.5935880272765325e-05,
"loss": 3.1502,
"step": 8690
},
{
"epoch": 1.88,
"learning_rate": 3.590443094102969e-05,
"loss": 3.1832,
"step": 8700
},
{
"epoch": 1.88,
"learning_rate": 3.5872960284255e-05,
"loss": 3.1386,
"step": 8710
},
{
"epoch": 1.88,
"learning_rate": 3.584146836398624e-05,
"loss": 3.1602,
"step": 8720
},
{
"epoch": 1.88,
"learning_rate": 3.5809955241809964e-05,
"loss": 3.1216,
"step": 8730
},
{
"epoch": 1.89,
"learning_rate": 3.5778420979354216e-05,
"loss": 3.1161,
"step": 8740
},
{
"epoch": 1.89,
"learning_rate": 3.5746865638288344e-05,
"loss": 3.1314,
"step": 8750
},
{
"epoch": 1.89,
"learning_rate": 3.571528928032296e-05,
"loss": 3.0919,
"step": 8760
},
{
"epoch": 1.89,
"learning_rate": 3.5683691967209745e-05,
"loss": 3.1507,
"step": 8770
},
{
"epoch": 1.9,
"learning_rate": 3.565207376074138e-05,
"loss": 3.1514,
"step": 8780
},
{
"epoch": 1.9,
"learning_rate": 3.562043472275139e-05,
"loss": 3.1263,
"step": 8790
},
{
"epoch": 1.9,
"learning_rate": 3.558877491511405e-05,
"loss": 3.1518,
"step": 8800
},
{
"epoch": 1.9,
"learning_rate": 3.555709439974424e-05,
"loss": 3.1468,
"step": 8810
},
{
"epoch": 1.9,
"learning_rate": 3.552539323859736e-05,
"loss": 3.1016,
"step": 8820
},
{
"epoch": 1.91,
"learning_rate": 3.549367149366916e-05,
"loss": 3.1782,
"step": 8830
},
{
"epoch": 1.91,
"learning_rate": 3.5461929226995675e-05,
"loss": 3.1095,
"step": 8840
},
{
"epoch": 1.91,
"learning_rate": 3.5430166500653025e-05,
"loss": 3.0431,
"step": 8850
},
{
"epoch": 1.91,
"learning_rate": 3.5398383376757385e-05,
"loss": 3.1664,
"step": 8860
},
{
"epoch": 1.91,
"learning_rate": 3.5366579917464804e-05,
"loss": 3.1158,
"step": 8870
},
{
"epoch": 1.92,
"learning_rate": 3.533475618497111e-05,
"loss": 3.071,
"step": 8880
},
{
"epoch": 1.92,
"learning_rate": 3.5302912241511756e-05,
"loss": 3.1498,
"step": 8890
},
{
"epoch": 1.92,
"learning_rate": 3.5271048149361744e-05,
"loss": 3.0992,
"step": 8900
},
{
"epoch": 1.92,
"learning_rate": 3.523916397083546e-05,
"loss": 3.1064,
"step": 8910
},
{
"epoch": 1.93,
"learning_rate": 3.520725976828658e-05,
"loss": 3.1639,
"step": 8920
},
{
"epoch": 1.93,
"learning_rate": 3.5175335604107934e-05,
"loss": 3.1616,
"step": 8930
},
{
"epoch": 1.93,
"learning_rate": 3.5143391540731415e-05,
"loss": 3.1242,
"step": 8940
},
{
"epoch": 1.93,
"learning_rate": 3.5111427640627794e-05,
"loss": 3.075,
"step": 8950
},
{
"epoch": 1.93,
"learning_rate": 3.507944396630666e-05,
"loss": 3.0899,
"step": 8960
},
{
"epoch": 1.94,
"learning_rate": 3.504744058031625e-05,
"loss": 3.1955,
"step": 8970
},
{
"epoch": 1.94,
"learning_rate": 3.501541754524339e-05,
"loss": 3.1204,
"step": 8980
},
{
"epoch": 1.94,
"learning_rate": 3.49833749237133e-05,
"loss": 3.1625,
"step": 8990
},
{
"epoch": 1.94,
"learning_rate": 3.4951312778389504e-05,
"loss": 3.1284,
"step": 9000
},
{
"epoch": 1.95,
"learning_rate": 3.4919231171973724e-05,
"loss": 3.0868,
"step": 9010
},
{
"epoch": 1.95,
"learning_rate": 3.488713016720573e-05,
"loss": 3.1107,
"step": 9020
},
{
"epoch": 1.95,
"learning_rate": 3.4855009826863225e-05,
"loss": 3.1362,
"step": 9030
},
{
"epoch": 1.95,
"learning_rate": 3.482287021376173e-05,
"loss": 3.0686,
"step": 9040
},
{
"epoch": 1.95,
"learning_rate": 3.479071139075446e-05,
"loss": 3.1225,
"step": 9050
},
{
"epoch": 1.96,
"learning_rate": 3.4758533420732195e-05,
"loss": 3.156,
"step": 9060
},
{
"epoch": 1.96,
"learning_rate": 3.472633636662316e-05,
"loss": 3.1497,
"step": 9070
},
{
"epoch": 1.96,
"learning_rate": 3.469412029139289e-05,
"loss": 3.0961,
"step": 9080
},
{
"epoch": 1.96,
"learning_rate": 3.466188525804414e-05,
"loss": 3.0989,
"step": 9090
},
{
"epoch": 1.96,
"learning_rate": 3.462963132961672e-05,
"loss": 3.1339,
"step": 9100
},
{
"epoch": 1.97,
"learning_rate": 3.459735856918741e-05,
"loss": 3.1381,
"step": 9110
},
{
"epoch": 1.97,
"learning_rate": 3.45650670398698e-05,
"loss": 3.104,
"step": 9120
},
{
"epoch": 1.97,
"learning_rate": 3.453275680481419e-05,
"loss": 3.1142,
"step": 9130
},
{
"epoch": 1.97,
"learning_rate": 3.450042792720748e-05,
"loss": 3.1381,
"step": 9140
},
{
"epoch": 1.98,
"learning_rate": 3.4468080470273e-05,
"loss": 3.0822,
"step": 9150
},
{
"epoch": 1.98,
"learning_rate": 3.443571449727042e-05,
"loss": 3.1343,
"step": 9160
},
{
"epoch": 1.98,
"learning_rate": 3.440333007149565e-05,
"loss": 3.1531,
"step": 9170
},
{
"epoch": 1.98,
"learning_rate": 3.4370927256280654e-05,
"loss": 3.0654,
"step": 9180
},
{
"epoch": 1.98,
"learning_rate": 3.433850611499336e-05,
"loss": 3.1617,
"step": 9190
},
{
"epoch": 1.99,
"learning_rate": 3.4306066711037566e-05,
"loss": 3.1418,
"step": 9200
},
{
"epoch": 1.99,
"learning_rate": 3.427360910785275e-05,
"loss": 3.0834,
"step": 9210
},
{
"epoch": 1.99,
"learning_rate": 3.424113336891399e-05,
"loss": 3.1146,
"step": 9220
},
{
"epoch": 1.99,
"learning_rate": 3.420863955773184e-05,
"loss": 3.1358,
"step": 9230
},
{
"epoch": 1.99,
"learning_rate": 3.4176127737852196e-05,
"loss": 3.1704,
"step": 9240
},
{
"epoch": 2.0,
"learning_rate": 3.4143597972856176e-05,
"loss": 3.1462,
"step": 9250
},
{
"epoch": 2.0,
"learning_rate": 3.411105032635996e-05,
"loss": 3.176,
"step": 9260
},
{
"epoch": 2.0,
"eval_loss": 3.116755962371826,
"eval_runtime": 192.2124,
"eval_samples_per_second": 771.126,
"eval_steps_per_second": 24.098,
"step": 9264
},
{
"epoch": 2.0,
"learning_rate": 3.407848486201474e-05,
"loss": 3.1164,
"step": 9270
},
{
"epoch": 2.0,
"learning_rate": 3.404590164350654e-05,
"loss": 3.1071,
"step": 9280
},
{
"epoch": 2.01,
"learning_rate": 3.401330073455609e-05,
"loss": 3.104,
"step": 9290
},
{
"epoch": 2.01,
"learning_rate": 3.3980682198918724e-05,
"loss": 3.1238,
"step": 9300
},
{
"epoch": 2.01,
"learning_rate": 3.394804610038425e-05,
"loss": 3.1772,
"step": 9310
},
{
"epoch": 2.01,
"learning_rate": 3.391539250277683e-05,
"loss": 3.0895,
"step": 9320
},
{
"epoch": 2.01,
"learning_rate": 3.3882721469954836e-05,
"loss": 3.1571,
"step": 9330
},
{
"epoch": 2.02,
"learning_rate": 3.385003306581074e-05,
"loss": 3.0622,
"step": 9340
},
{
"epoch": 2.02,
"learning_rate": 3.381732735427098e-05,
"loss": 3.0843,
"step": 9350
},
{
"epoch": 2.02,
"learning_rate": 3.378460439929585e-05,
"loss": 3.1206,
"step": 9360
},
{
"epoch": 2.02,
"learning_rate": 3.3751864264879365e-05,
"loss": 3.1425,
"step": 9370
},
{
"epoch": 2.03,
"learning_rate": 3.371910701504913e-05,
"loss": 3.1208,
"step": 9380
},
{
"epoch": 2.03,
"learning_rate": 3.368633271386624e-05,
"loss": 3.1281,
"step": 9390
},
{
"epoch": 2.03,
"learning_rate": 3.36535414254251e-05,
"loss": 3.11,
"step": 9400
},
{
"epoch": 2.03,
"learning_rate": 3.3620733213853375e-05,
"loss": 3.1245,
"step": 9410
},
{
"epoch": 2.03,
"learning_rate": 3.3587908143311794e-05,
"loss": 3.0622,
"step": 9420
},
{
"epoch": 2.04,
"learning_rate": 3.3555066277994086e-05,
"loss": 3.1447,
"step": 9430
},
{
"epoch": 2.04,
"learning_rate": 3.3522207682126794e-05,
"loss": 3.1086,
"step": 9440
},
{
"epoch": 2.04,
"learning_rate": 3.3489332419969214e-05,
"loss": 3.1308,
"step": 9450
},
{
"epoch": 2.04,
"learning_rate": 3.345644055581319e-05,
"loss": 3.1152,
"step": 9460
},
{
"epoch": 2.04,
"learning_rate": 3.342353215398307e-05,
"loss": 3.1107,
"step": 9470
},
{
"epoch": 2.05,
"learning_rate": 3.339060727883554e-05,
"loss": 3.1278,
"step": 9480
},
{
"epoch": 2.05,
"learning_rate": 3.335766599475948e-05,
"loss": 3.1098,
"step": 9490
},
{
"epoch": 2.05,
"learning_rate": 3.332470836617589e-05,
"loss": 3.1251,
"step": 9500
},
{
"epoch": 2.05,
"learning_rate": 3.329173445753771e-05,
"loss": 3.0778,
"step": 9510
},
{
"epoch": 2.06,
"learning_rate": 3.325874433332972e-05,
"loss": 3.1043,
"step": 9520
},
{
"epoch": 2.06,
"learning_rate": 3.3225738058068425e-05,
"loss": 3.0784,
"step": 9530
},
{
"epoch": 2.06,
"learning_rate": 3.3192715696301895e-05,
"loss": 3.1185,
"step": 9540
},
{
"epoch": 2.06,
"learning_rate": 3.315967731260969e-05,
"loss": 3.0843,
"step": 9550
},
{
"epoch": 2.06,
"learning_rate": 3.312662297160267e-05,
"loss": 3.0896,
"step": 9560
},
{
"epoch": 2.07,
"learning_rate": 3.309355273792292e-05,
"loss": 3.1469,
"step": 9570
},
{
"epoch": 2.07,
"learning_rate": 3.306046667624361e-05,
"loss": 3.0886,
"step": 9580
},
{
"epoch": 2.07,
"learning_rate": 3.302736485126885e-05,
"loss": 3.0841,
"step": 9590
},
{
"epoch": 2.07,
"learning_rate": 3.2994247327733566e-05,
"loss": 3.1063,
"step": 9600
},
{
"epoch": 2.07,
"learning_rate": 3.2961114170403436e-05,
"loss": 3.0773,
"step": 9610
},
{
"epoch": 2.08,
"learning_rate": 3.2927965444074646e-05,
"loss": 3.0848,
"step": 9620
},
{
"epoch": 2.08,
"learning_rate": 3.289480121357388e-05,
"loss": 3.1147,
"step": 9630
},
{
"epoch": 2.08,
"learning_rate": 3.286162154375811e-05,
"loss": 3.1065,
"step": 9640
},
{
"epoch": 2.08,
"learning_rate": 3.282842649951451e-05,
"loss": 3.0612,
"step": 9650
},
{
"epoch": 2.09,
"learning_rate": 3.279521614576034e-05,
"loss": 3.1294,
"step": 9660
},
{
"epoch": 2.09,
"learning_rate": 3.276199054744279e-05,
"loss": 3.0764,
"step": 9670
},
{
"epoch": 2.09,
"learning_rate": 3.272874976953883e-05,
"loss": 3.1182,
"step": 9680
},
{
"epoch": 2.09,
"learning_rate": 3.269549387705517e-05,
"loss": 3.1077,
"step": 9690
},
{
"epoch": 2.09,
"learning_rate": 3.2662222935028036e-05,
"loss": 3.0921,
"step": 9700
},
{
"epoch": 2.1,
"learning_rate": 3.2628937008523106e-05,
"loss": 3.1092,
"step": 9710
},
{
"epoch": 2.1,
"learning_rate": 3.259563616263536e-05,
"loss": 3.0678,
"step": 9720
},
{
"epoch": 2.1,
"learning_rate": 3.256232046248895e-05,
"loss": 3.1179,
"step": 9730
},
{
"epoch": 2.1,
"learning_rate": 3.252898997323707e-05,
"loss": 3.1153,
"step": 9740
},
{
"epoch": 2.1,
"learning_rate": 3.249564476006187e-05,
"loss": 3.0629,
"step": 9750
},
{
"epoch": 2.11,
"learning_rate": 3.246228488817424e-05,
"loss": 3.1144,
"step": 9760
},
{
"epoch": 2.11,
"learning_rate": 3.2428910422813786e-05,
"loss": 3.0786,
"step": 9770
},
{
"epoch": 2.11,
"learning_rate": 3.239552142924862e-05,
"loss": 3.1056,
"step": 9780
},
{
"epoch": 2.11,
"learning_rate": 3.23621179727753e-05,
"loss": 3.1511,
"step": 9790
},
{
"epoch": 2.12,
"learning_rate": 3.232870011871863e-05,
"loss": 3.0994,
"step": 9800
},
{
"epoch": 2.12,
"learning_rate": 3.22952679324316e-05,
"loss": 3.1298,
"step": 9810
},
{
"epoch": 2.12,
"learning_rate": 3.2261821479295214e-05,
"loss": 3.1427,
"step": 9820
},
{
"epoch": 2.12,
"learning_rate": 3.222836082471838e-05,
"loss": 3.1254,
"step": 9830
},
{
"epoch": 2.12,
"learning_rate": 3.219488603413777e-05,
"loss": 3.0876,
"step": 9840
},
{
"epoch": 2.13,
"learning_rate": 3.2161397173017727e-05,
"loss": 3.0734,
"step": 9850
},
{
"epoch": 2.13,
"learning_rate": 3.2127894306850084e-05,
"loss": 3.0834,
"step": 9860
},
{
"epoch": 2.13,
"learning_rate": 3.209437750115407e-05,
"loss": 3.1251,
"step": 9870
},
{
"epoch": 2.13,
"learning_rate": 3.206084682147617e-05,
"loss": 3.109,
"step": 9880
},
{
"epoch": 2.14,
"learning_rate": 3.202730233339002e-05,
"loss": 3.1238,
"step": 9890
},
{
"epoch": 2.14,
"learning_rate": 3.199374410249625e-05,
"loss": 3.0921,
"step": 9900
},
{
"epoch": 2.14,
"learning_rate": 3.1960172194422355e-05,
"loss": 3.0811,
"step": 9910
},
{
"epoch": 2.14,
"learning_rate": 3.192658667482259e-05,
"loss": 3.1357,
"step": 9920
},
{
"epoch": 2.14,
"learning_rate": 3.189298760937782e-05,
"loss": 3.1182,
"step": 9930
},
{
"epoch": 2.15,
"learning_rate": 3.185937506379542e-05,
"loss": 3.1047,
"step": 9940
},
{
"epoch": 2.15,
"learning_rate": 3.18257491038091e-05,
"loss": 3.1334,
"step": 9950
},
{
"epoch": 2.15,
"learning_rate": 3.1792109795178825e-05,
"loss": 3.1351,
"step": 9960
},
{
"epoch": 2.15,
"learning_rate": 3.1758457203690655e-05,
"loss": 3.1448,
"step": 9970
},
{
"epoch": 2.15,
"learning_rate": 3.1724791395156625e-05,
"loss": 3.1141,
"step": 9980
},
{
"epoch": 2.16,
"learning_rate": 3.169111243541462e-05,
"loss": 3.1356,
"step": 9990
},
{
"epoch": 2.16,
"learning_rate": 3.165742039032825e-05,
"loss": 3.1419,
"step": 10000
},
{
"epoch": 2.16,
"learning_rate": 3.1623715325786715e-05,
"loss": 3.1144,
"step": 10010
},
{
"epoch": 2.16,
"learning_rate": 3.158999730770465e-05,
"loss": 3.1057,
"step": 10020
},
{
"epoch": 2.17,
"learning_rate": 3.155626640202207e-05,
"loss": 3.0705,
"step": 10030
},
{
"epoch": 2.17,
"learning_rate": 3.152252267470416e-05,
"loss": 3.1277,
"step": 10040
},
{
"epoch": 2.17,
"learning_rate": 3.148876619174117e-05,
"loss": 3.0692,
"step": 10050
},
{
"epoch": 2.17,
"learning_rate": 3.145499701914833e-05,
"loss": 3.1255,
"step": 10060
},
{
"epoch": 2.17,
"learning_rate": 3.142121522296566e-05,
"loss": 3.102,
"step": 10070
},
{
"epoch": 2.18,
"learning_rate": 3.138742086925788e-05,
"loss": 3.1029,
"step": 10080
},
{
"epoch": 2.18,
"learning_rate": 3.1353614024114244e-05,
"loss": 3.094,
"step": 10090
},
{
"epoch": 2.18,
"learning_rate": 3.131979475364848e-05,
"loss": 3.0889,
"step": 10100
},
{
"epoch": 2.18,
"learning_rate": 3.128596312399858e-05,
"loss": 3.1127,
"step": 10110
},
{
"epoch": 2.18,
"learning_rate": 3.1252119201326705e-05,
"loss": 3.1553,
"step": 10120
},
{
"epoch": 2.19,
"learning_rate": 3.121826305181909e-05,
"loss": 3.0632,
"step": 10130
},
{
"epoch": 2.19,
"learning_rate": 3.1184394741685816e-05,
"loss": 3.107,
"step": 10140
},
{
"epoch": 2.19,
"learning_rate": 3.115051433716083e-05,
"loss": 3.0981,
"step": 10150
},
{
"epoch": 2.19,
"learning_rate": 3.111662190450168e-05,
"loss": 3.1207,
"step": 10160
},
{
"epoch": 2.2,
"learning_rate": 3.1082717509989416e-05,
"loss": 3.0722,
"step": 10170
},
{
"epoch": 2.2,
"learning_rate": 3.104880121992855e-05,
"loss": 3.1075,
"step": 10180
},
{
"epoch": 2.2,
"learning_rate": 3.101487310064678e-05,
"loss": 3.096,
"step": 10190
},
{
"epoch": 2.2,
"learning_rate": 3.0980933218495005e-05,
"loss": 3.1503,
"step": 10200
},
{
"epoch": 2.2,
"learning_rate": 3.0946981639847084e-05,
"loss": 3.0735,
"step": 10210
},
{
"epoch": 2.21,
"learning_rate": 3.091301843109977e-05,
"loss": 3.0709,
"step": 10220
},
{
"epoch": 2.21,
"learning_rate": 3.087904365867254e-05,
"loss": 3.1344,
"step": 10230
},
{
"epoch": 2.21,
"learning_rate": 3.084505738900753e-05,
"loss": 3.1359,
"step": 10240
},
{
"epoch": 2.21,
"learning_rate": 3.0811059688569287e-05,
"loss": 3.0743,
"step": 10250
},
{
"epoch": 2.22,
"learning_rate": 3.077705062384479e-05,
"loss": 3.1118,
"step": 10260
},
{
"epoch": 2.22,
"learning_rate": 3.074303026134319e-05,
"loss": 3.138,
"step": 10270
},
{
"epoch": 2.22,
"learning_rate": 3.070899866759575e-05,
"loss": 3.0892,
"step": 10280
},
{
"epoch": 2.22,
"learning_rate": 3.06749559091557e-05,
"loss": 3.0744,
"step": 10290
},
{
"epoch": 2.22,
"learning_rate": 3.064090205259811e-05,
"loss": 3.1033,
"step": 10300
},
{
"epoch": 2.23,
"learning_rate": 3.060683716451973e-05,
"loss": 3.133,
"step": 10310
},
{
"epoch": 2.23,
"learning_rate": 3.0572761311538914e-05,
"loss": 3.1381,
"step": 10320
},
{
"epoch": 2.23,
"learning_rate": 3.0538674560295423e-05,
"loss": 3.0978,
"step": 10330
},
{
"epoch": 2.23,
"learning_rate": 3.0504576977450367e-05,
"loss": 3.0755,
"step": 10340
},
{
"epoch": 2.23,
"learning_rate": 3.0470468629686016e-05,
"loss": 3.1048,
"step": 10350
},
{
"epoch": 2.24,
"learning_rate": 3.0436349583705704e-05,
"loss": 3.159,
"step": 10360
},
{
"epoch": 2.24,
"learning_rate": 3.0402219906233676e-05,
"loss": 3.0511,
"step": 10370
},
{
"epoch": 2.24,
"learning_rate": 3.036807966401498e-05,
"loss": 3.1379,
"step": 10380
},
{
"epoch": 2.24,
"learning_rate": 3.0333928923815326e-05,
"loss": 3.1011,
"step": 10390
},
{
"epoch": 2.25,
"learning_rate": 3.0299767752420926e-05,
"loss": 3.1319,
"step": 10400
},
{
"epoch": 2.25,
"learning_rate": 3.026559621663843e-05,
"loss": 3.1408,
"step": 10410
},
{
"epoch": 2.25,
"learning_rate": 3.0231414383294736e-05,
"loss": 3.092,
"step": 10420
},
{
"epoch": 2.25,
"learning_rate": 3.019722231923689e-05,
"loss": 3.1126,
"step": 10430
},
{
"epoch": 2.25,
"learning_rate": 3.0163020091331928e-05,
"loss": 3.1085,
"step": 10440
},
{
"epoch": 2.26,
"learning_rate": 3.0128807766466776e-05,
"loss": 3.0773,
"step": 10450
},
{
"epoch": 2.26,
"learning_rate": 3.009458541154811e-05,
"loss": 3.13,
"step": 10460
},
{
"epoch": 2.26,
"learning_rate": 3.0060353093502215e-05,
"loss": 3.1377,
"step": 10470
},
{
"epoch": 2.26,
"learning_rate": 3.0026110879274854e-05,
"loss": 3.1241,
"step": 10480
},
{
"epoch": 2.26,
"learning_rate": 2.9991858835831155e-05,
"loss": 3.0861,
"step": 10490
},
{
"epoch": 2.27,
"learning_rate": 2.9957597030155455e-05,
"loss": 3.1324,
"step": 10500
},
{
"epoch": 2.27,
"learning_rate": 2.99233255292512e-05,
"loss": 3.0698,
"step": 10510
},
{
"epoch": 2.27,
"learning_rate": 2.9889044400140785e-05,
"loss": 3.0967,
"step": 10520
},
{
"epoch": 2.27,
"learning_rate": 2.9854753709865434e-05,
"loss": 3.0755,
"step": 10530
},
{
"epoch": 2.28,
"learning_rate": 2.982045352548507e-05,
"loss": 3.12,
"step": 10540
},
{
"epoch": 2.28,
"learning_rate": 2.9786143914078184e-05,
"loss": 3.0749,
"step": 10550
},
{
"epoch": 2.28,
"learning_rate": 2.9751824942741708e-05,
"loss": 3.1039,
"step": 10560
},
{
"epoch": 2.28,
"learning_rate": 2.9717496678590868e-05,
"loss": 3.129,
"step": 10570
},
{
"epoch": 2.28,
"learning_rate": 2.9683159188759065e-05,
"loss": 3.0406,
"step": 10580
},
{
"epoch": 2.29,
"learning_rate": 2.9648812540397746e-05,
"loss": 3.0813,
"step": 10590
},
{
"epoch": 2.29,
"learning_rate": 2.9614456800676276e-05,
"loss": 3.1081,
"step": 10600
},
{
"epoch": 2.29,
"learning_rate": 2.9580092036781792e-05,
"loss": 3.0899,
"step": 10610
},
{
"epoch": 2.29,
"learning_rate": 2.9545718315919074e-05,
"loss": 3.1271,
"step": 10620
},
{
"epoch": 2.29,
"learning_rate": 2.9511335705310416e-05,
"loss": 3.1272,
"step": 10630
},
{
"epoch": 2.3,
"learning_rate": 2.9476944272195518e-05,
"loss": 3.1155,
"step": 10640
},
{
"epoch": 2.3,
"learning_rate": 2.9442544083831307e-05,
"loss": 3.1526,
"step": 10650
},
{
"epoch": 2.3,
"learning_rate": 2.9408135207491853e-05,
"loss": 3.1156,
"step": 10660
},
{
"epoch": 2.3,
"learning_rate": 2.9373717710468213e-05,
"loss": 3.0691,
"step": 10670
},
{
"epoch": 2.31,
"learning_rate": 2.9339291660068284e-05,
"loss": 3.1182,
"step": 10680
},
{
"epoch": 2.31,
"learning_rate": 2.9304857123616714e-05,
"loss": 3.0618,
"step": 10690
},
{
"epoch": 2.31,
"learning_rate": 2.927041416845473e-05,
"loss": 3.1102,
"step": 10700
},
{
"epoch": 2.31,
"learning_rate": 2.9235962861940035e-05,
"loss": 3.13,
"step": 10710
},
{
"epoch": 2.31,
"learning_rate": 2.9201503271446666e-05,
"loss": 3.0638,
"step": 10720
},
{
"epoch": 2.32,
"learning_rate": 2.916703546436484e-05,
"loss": 3.1512,
"step": 10730
},
{
"epoch": 2.32,
"learning_rate": 2.9132559508100866e-05,
"loss": 3.1324,
"step": 10740
},
{
"epoch": 2.32,
"learning_rate": 2.909807547007697e-05,
"loss": 3.1095,
"step": 10750
},
{
"epoch": 2.32,
"learning_rate": 2.90635834177312e-05,
"loss": 3.0785,
"step": 10760
},
{
"epoch": 2.33,
"learning_rate": 2.9029083418517268e-05,
"loss": 3.0682,
"step": 10770
},
{
"epoch": 2.33,
"learning_rate": 2.899457553990442e-05,
"loss": 3.0904,
"step": 10780
},
{
"epoch": 2.33,
"learning_rate": 2.896005984937734e-05,
"loss": 3.1123,
"step": 10790
},
{
"epoch": 2.33,
"learning_rate": 2.892553641443595e-05,
"loss": 3.063,
"step": 10800
},
{
"epoch": 2.33,
"learning_rate": 2.8891005302595346e-05,
"loss": 3.1219,
"step": 10810
},
{
"epoch": 2.34,
"learning_rate": 2.8856466581385628e-05,
"loss": 3.1406,
"step": 10820
},
{
"epoch": 2.34,
"learning_rate": 2.8821920318351774e-05,
"loss": 3.1197,
"step": 10830
},
{
"epoch": 2.34,
"learning_rate": 2.878736658105352e-05,
"loss": 3.1293,
"step": 10840
},
{
"epoch": 2.34,
"learning_rate": 2.87528054370652e-05,
"loss": 3.0861,
"step": 10850
},
{
"epoch": 2.34,
"learning_rate": 2.8718236953975652e-05,
"loss": 3.0714,
"step": 10860
},
{
"epoch": 2.35,
"learning_rate": 2.8683661199388064e-05,
"loss": 3.063,
"step": 10870
},
{
"epoch": 2.35,
"learning_rate": 2.864907824091984e-05,
"loss": 3.1496,
"step": 10880
},
{
"epoch": 2.35,
"learning_rate": 2.8614488146202466e-05,
"loss": 3.0788,
"step": 10890
},
{
"epoch": 2.35,
"learning_rate": 2.8579890982881396e-05,
"loss": 3.0164,
"step": 10900
},
{
"epoch": 2.36,
"learning_rate": 2.8545286818615897e-05,
"loss": 3.1023,
"step": 10910
},
{
"epoch": 2.36,
"learning_rate": 2.8510675721078937e-05,
"loss": 3.0453,
"step": 10920
},
{
"epoch": 2.36,
"learning_rate": 2.847605775795704e-05,
"loss": 3.1209,
"step": 10930
},
{
"epoch": 2.36,
"learning_rate": 2.844143299695015e-05,
"loss": 3.0609,
"step": 10940
},
{
"epoch": 2.36,
"learning_rate": 2.8406801505771514e-05,
"loss": 3.0713,
"step": 10950
},
{
"epoch": 2.37,
"learning_rate": 2.837216335214753e-05,
"loss": 3.0678,
"step": 10960
},
{
"epoch": 2.37,
"learning_rate": 2.8337518603817635e-05,
"loss": 3.0978,
"step": 10970
},
{
"epoch": 2.37,
"learning_rate": 2.8302867328534166e-05,
"loss": 3.0728,
"step": 10980
},
{
"epoch": 2.37,
"learning_rate": 2.826820959406221e-05,
"loss": 3.0968,
"step": 10990
},
{
"epoch": 2.37,
"learning_rate": 2.8233545468179494e-05,
"loss": 3.0653,
"step": 11000
},
{
"epoch": 2.38,
"learning_rate": 2.8198875018676247e-05,
"loss": 3.0835,
"step": 11010
},
{
"epoch": 2.38,
"learning_rate": 2.816419831335506e-05,
"loss": 3.0784,
"step": 11020
},
{
"epoch": 2.38,
"learning_rate": 2.812951542003076e-05,
"loss": 3.0441,
"step": 11030
},
{
"epoch": 2.38,
"learning_rate": 2.8094826406530277e-05,
"loss": 3.079,
"step": 11040
},
{
"epoch": 2.39,
"learning_rate": 2.8060131340692515e-05,
"loss": 3.1258,
"step": 11050
},
{
"epoch": 2.39,
"learning_rate": 2.8025430290368186e-05,
"loss": 3.0687,
"step": 11060
},
{
"epoch": 2.39,
"learning_rate": 2.799072332341975e-05,
"loss": 3.0717,
"step": 11070
},
{
"epoch": 2.39,
"learning_rate": 2.7956010507721193e-05,
"loss": 3.0986,
"step": 11080
},
{
"epoch": 2.39,
"learning_rate": 2.7921291911157975e-05,
"loss": 3.1202,
"step": 11090
},
{
"epoch": 2.4,
"learning_rate": 2.788656760162685e-05,
"loss": 3.0424,
"step": 11100
},
{
"epoch": 2.4,
"learning_rate": 2.7851837647035727e-05,
"loss": 3.0636,
"step": 11110
},
{
"epoch": 2.4,
"learning_rate": 2.7817102115303577e-05,
"loss": 3.0761,
"step": 11120
},
{
"epoch": 2.4,
"learning_rate": 2.778236107436027e-05,
"loss": 3.0874,
"step": 11130
},
{
"epoch": 2.41,
"learning_rate": 2.774761459214645e-05,
"loss": 3.0725,
"step": 11140
},
{
"epoch": 2.41,
"learning_rate": 2.7712862736613393e-05,
"loss": 3.095,
"step": 11150
},
{
"epoch": 2.41,
"learning_rate": 2.7678105575722903e-05,
"loss": 3.1073,
"step": 11160
},
{
"epoch": 2.41,
"learning_rate": 2.764334317744714e-05,
"loss": 3.1232,
"step": 11170
},
{
"epoch": 2.41,
"learning_rate": 2.7608575609768516e-05,
"loss": 3.1249,
"step": 11180
},
{
"epoch": 2.42,
"learning_rate": 2.7573802940679554e-05,
"loss": 3.1031,
"step": 11190
},
{
"epoch": 2.42,
"learning_rate": 2.7539025238182755e-05,
"loss": 3.1128,
"step": 11200
},
{
"epoch": 2.42,
"learning_rate": 2.750424257029044e-05,
"loss": 3.086,
"step": 11210
},
{
"epoch": 2.42,
"learning_rate": 2.746945500502468e-05,
"loss": 3.0999,
"step": 11220
},
{
"epoch": 2.42,
"learning_rate": 2.74346626104171e-05,
"loss": 3.1157,
"step": 11230
},
{
"epoch": 2.43,
"learning_rate": 2.739986545450876e-05,
"loss": 3.0799,
"step": 11240
},
{
"epoch": 2.43,
"learning_rate": 2.7365063605350055e-05,
"loss": 3.1216,
"step": 11250
},
{
"epoch": 2.43,
"learning_rate": 2.733025713100054e-05,
"loss": 3.102,
"step": 11260
},
{
"epoch": 2.43,
"learning_rate": 2.7295446099528833e-05,
"loss": 3.1082,
"step": 11270
},
{
"epoch": 2.44,
"learning_rate": 2.7260630579012437e-05,
"loss": 3.0696,
"step": 11280
},
{
"epoch": 2.44,
"learning_rate": 2.7225810637537657e-05,
"loss": 3.1085,
"step": 11290
},
{
"epoch": 2.44,
"learning_rate": 2.7190986343199444e-05,
"loss": 3.0841,
"step": 11300
},
{
"epoch": 2.44,
"learning_rate": 2.7156157764101237e-05,
"loss": 3.0917,
"step": 11310
},
{
"epoch": 2.44,
"learning_rate": 2.7121324968354896e-05,
"loss": 3.0829,
"step": 11320
},
{
"epoch": 2.45,
"learning_rate": 2.7086488024080482e-05,
"loss": 3.0723,
"step": 11330
},
{
"epoch": 2.45,
"learning_rate": 2.7051646999406198e-05,
"loss": 3.0931,
"step": 11340
},
{
"epoch": 2.45,
"learning_rate": 2.7016801962468218e-05,
"loss": 3.0761,
"step": 11350
},
{
"epoch": 2.45,
"learning_rate": 2.698195298141057e-05,
"loss": 3.1442,
"step": 11360
},
{
"epoch": 2.45,
"learning_rate": 2.6947100124384977e-05,
"loss": 3.0613,
"step": 11370
},
{
"epoch": 2.46,
"learning_rate": 2.6912243459550763e-05,
"loss": 3.121,
"step": 11380
},
{
"epoch": 2.46,
"learning_rate": 2.6877383055074683e-05,
"loss": 3.1307,
"step": 11390
},
{
"epoch": 2.46,
"learning_rate": 2.6842518979130814e-05,
"loss": 3.1062,
"step": 11400
},
{
"epoch": 2.46,
"learning_rate": 2.680765129990041e-05,
"loss": 3.1192,
"step": 11410
},
{
"epoch": 2.47,
"learning_rate": 2.677278008557177e-05,
"loss": 3.125,
"step": 11420
},
{
"epoch": 2.47,
"learning_rate": 2.673790540434011e-05,
"loss": 3.1473,
"step": 11430
},
{
"epoch": 2.47,
"learning_rate": 2.6703027324407427e-05,
"loss": 3.0819,
"step": 11440
},
{
"epoch": 2.47,
"learning_rate": 2.6668145913982356e-05,
"loss": 3.0896,
"step": 11450
},
{
"epoch": 2.47,
"learning_rate": 2.663326124128006e-05,
"loss": 3.1016,
"step": 11460
},
{
"epoch": 2.48,
"learning_rate": 2.6598373374522067e-05,
"loss": 3.1391,
"step": 11470
},
{
"epoch": 2.48,
"learning_rate": 2.656348238193616e-05,
"loss": 3.1086,
"step": 11480
},
{
"epoch": 2.48,
"learning_rate": 2.652858833175623e-05,
"loss": 3.1175,
"step": 11490
},
{
"epoch": 2.48,
"learning_rate": 2.6493691292222154e-05,
"loss": 3.0815,
"step": 11500
},
{
"epoch": 2.48,
"learning_rate": 2.6458791331579653e-05,
"loss": 3.1265,
"step": 11510
},
{
"epoch": 2.49,
"learning_rate": 2.6423888518080143e-05,
"loss": 3.116,
"step": 11520
},
{
"epoch": 2.49,
"learning_rate": 2.6388982919980653e-05,
"loss": 3.0215,
"step": 11530
},
{
"epoch": 2.49,
"learning_rate": 2.635407460554363e-05,
"loss": 3.0756,
"step": 11540
},
{
"epoch": 2.49,
"learning_rate": 2.631916364303685e-05,
"loss": 3.0693,
"step": 11550
},
{
"epoch": 2.5,
"learning_rate": 2.6284250100733253e-05,
"loss": 3.0939,
"step": 11560
},
{
"epoch": 2.5,
"learning_rate": 2.624933404691083e-05,
"loss": 3.0734,
"step": 11570
},
{
"epoch": 2.5,
"learning_rate": 2.6214415549852493e-05,
"loss": 3.0926,
"step": 11580
},
{
"epoch": 2.5,
"learning_rate": 2.617949467784592e-05,
"loss": 3.1195,
"step": 11590
},
{
"epoch": 2.5,
"learning_rate": 2.614457149918344e-05,
"loss": 3.051,
"step": 11600
},
{
"epoch": 2.51,
"learning_rate": 2.6109646082161888e-05,
"loss": 3.1115,
"step": 11610
},
{
"epoch": 2.51,
"learning_rate": 2.6074718495082472e-05,
"loss": 3.0824,
"step": 11620
},
{
"epoch": 2.51,
"learning_rate": 2.6039788806250664e-05,
"loss": 3.0901,
"step": 11630
},
{
"epoch": 2.51,
"learning_rate": 2.600485708397603e-05,
"loss": 3.0843,
"step": 11640
},
{
"epoch": 2.52,
"learning_rate": 2.596992339657211e-05,
"loss": 3.0616,
"step": 11650
},
{
"epoch": 2.52,
"learning_rate": 2.59349878123563e-05,
"loss": 3.1046,
"step": 11660
},
{
"epoch": 2.52,
"learning_rate": 2.590005039964969e-05,
"loss": 3.0518,
"step": 11670
},
{
"epoch": 2.52,
"learning_rate": 2.5865111226776955e-05,
"loss": 3.1193,
"step": 11680
},
{
"epoch": 2.52,
"learning_rate": 2.583017036206622e-05,
"loss": 3.1094,
"step": 11690
},
{
"epoch": 2.53,
"learning_rate": 2.57952278738489e-05,
"loss": 3.0654,
"step": 11700
},
{
"epoch": 2.53,
"learning_rate": 2.5760283830459604e-05,
"loss": 3.1159,
"step": 11710
},
{
"epoch": 2.53,
"learning_rate": 2.5725338300235964e-05,
"loss": 3.0928,
"step": 11720
},
{
"epoch": 2.53,
"learning_rate": 2.5690391351518527e-05,
"loss": 3.0995,
"step": 11730
},
{
"epoch": 2.53,
"learning_rate": 2.5655443052650636e-05,
"loss": 3.0705,
"step": 11740
},
{
"epoch": 2.54,
"learning_rate": 2.5620493471978234e-05,
"loss": 3.0478,
"step": 11750
},
{
"epoch": 2.54,
"learning_rate": 2.55855426778498e-05,
"loss": 3.1046,
"step": 11760
},
{
"epoch": 2.54,
"learning_rate": 2.5550590738616177e-05,
"loss": 3.0884,
"step": 11770
},
{
"epoch": 2.54,
"learning_rate": 2.5515637722630443e-05,
"loss": 3.0307,
"step": 11780
},
{
"epoch": 2.55,
"learning_rate": 2.5480683698247793e-05,
"loss": 3.0585,
"step": 11790
},
{
"epoch": 2.55,
"learning_rate": 2.5445728733825374e-05,
"loss": 3.0698,
"step": 11800
},
{
"epoch": 2.55,
"learning_rate": 2.54107728977222e-05,
"loss": 3.1397,
"step": 11810
},
{
"epoch": 2.55,
"learning_rate": 2.5375816258298973e-05,
"loss": 3.0161,
"step": 11820
},
{
"epoch": 2.55,
"learning_rate": 2.534085888391796e-05,
"loss": 3.0402,
"step": 11830
},
{
"epoch": 2.56,
"learning_rate": 2.530590084294287e-05,
"loss": 3.0921,
"step": 11840
},
{
"epoch": 2.56,
"learning_rate": 2.5270942203738736e-05,
"loss": 3.1017,
"step": 11850
},
{
"epoch": 2.56,
"learning_rate": 2.523598303467173e-05,
"loss": 3.1229,
"step": 11860
},
{
"epoch": 2.56,
"learning_rate": 2.520102340410907e-05,
"loss": 3.1025,
"step": 11870
},
{
"epoch": 2.56,
"learning_rate": 2.5166063380418887e-05,
"loss": 3.0991,
"step": 11880
},
{
"epoch": 2.57,
"learning_rate": 2.513110303197008e-05,
"loss": 3.0159,
"step": 11890
},
{
"epoch": 2.57,
"learning_rate": 2.509614242713216e-05,
"loss": 3.0764,
"step": 11900
},
{
"epoch": 2.57,
"learning_rate": 2.5061181634275165e-05,
"loss": 3.0662,
"step": 11910
},
{
"epoch": 2.57,
"learning_rate": 2.50262207217695e-05,
"loss": 3.0829,
"step": 11920
},
{
"epoch": 2.58,
"learning_rate": 2.4991259757985783e-05,
"loss": 3.1125,
"step": 11930
},
{
"epoch": 2.58,
"learning_rate": 2.4956298811294755e-05,
"loss": 3.0865,
"step": 11940
},
{
"epoch": 2.58,
"learning_rate": 2.4921337950067105e-05,
"loss": 3.0786,
"step": 11950
},
{
"epoch": 2.58,
"learning_rate": 2.4886377242673374e-05,
"loss": 3.0926,
"step": 11960
},
{
"epoch": 2.58,
"learning_rate": 2.485141675748378e-05,
"loss": 3.0696,
"step": 11970
},
{
"epoch": 2.59,
"learning_rate": 2.481645656286812e-05,
"loss": 3.123,
"step": 11980
},
{
"epoch": 2.59,
"learning_rate": 2.4781496727195633e-05,
"loss": 3.1018,
"step": 11990
},
{
"epoch": 2.59,
"learning_rate": 2.474653731883484e-05,
"loss": 3.0686,
"step": 12000
},
{
"epoch": 2.59,
"learning_rate": 2.4711578406153425e-05,
"loss": 3.0781,
"step": 12010
},
{
"epoch": 2.59,
"learning_rate": 2.4676620057518113e-05,
"loss": 3.1127,
"step": 12020
},
{
"epoch": 2.6,
"learning_rate": 2.4641662341294515e-05,
"loss": 3.0634,
"step": 12030
},
{
"epoch": 2.6,
"learning_rate": 2.460670532584702e-05,
"loss": 3.0725,
"step": 12040
},
{
"epoch": 2.6,
"learning_rate": 2.4571749079538628e-05,
"loss": 3.1118,
"step": 12050
},
{
"epoch": 2.6,
"learning_rate": 2.453679367073085e-05,
"loss": 3.13,
"step": 12060
},
{
"epoch": 2.61,
"learning_rate": 2.4501839167783552e-05,
"loss": 3.0759,
"step": 12070
},
{
"epoch": 2.61,
"learning_rate": 2.4466885639054836e-05,
"loss": 3.0375,
"step": 12080
},
{
"epoch": 2.61,
"learning_rate": 2.4431933152900885e-05,
"loss": 3.048,
"step": 12090
},
{
"epoch": 2.61,
"learning_rate": 2.439698177767586e-05,
"loss": 3.125,
"step": 12100
},
{
"epoch": 2.61,
"learning_rate": 2.436203158173173e-05,
"loss": 3.0834,
"step": 12110
},
{
"epoch": 2.62,
"learning_rate": 2.4327082633418177e-05,
"loss": 3.0619,
"step": 12120
},
{
"epoch": 2.62,
"learning_rate": 2.4292135001082433e-05,
"loss": 3.0853,
"step": 12130
},
{
"epoch": 2.62,
"learning_rate": 2.4257188753069156e-05,
"loss": 3.0798,
"step": 12140
},
{
"epoch": 2.62,
"learning_rate": 2.4222243957720293e-05,
"loss": 3.0659,
"step": 12150
},
{
"epoch": 2.63,
"learning_rate": 2.418730068337497e-05,
"loss": 3.0525,
"step": 12160
},
{
"epoch": 2.63,
"learning_rate": 2.4152358998369313e-05,
"loss": 3.1012,
"step": 12170
},
{
"epoch": 2.63,
"learning_rate": 2.4117418971036355e-05,
"loss": 3.0754,
"step": 12180
},
{
"epoch": 2.63,
"learning_rate": 2.4082480669705872e-05,
"loss": 3.0462,
"step": 12190
},
{
"epoch": 2.63,
"learning_rate": 2.4047544162704296e-05,
"loss": 3.0427,
"step": 12200
},
{
"epoch": 2.64,
"learning_rate": 2.4012609518354517e-05,
"loss": 3.0675,
"step": 12210
},
{
"epoch": 2.64,
"learning_rate": 2.3977676804975803e-05,
"loss": 3.1119,
"step": 12220
},
{
"epoch": 2.64,
"learning_rate": 2.3942746090883633e-05,
"loss": 3.0648,
"step": 12230
},
{
"epoch": 2.64,
"learning_rate": 2.3907817444389583e-05,
"loss": 3.0725,
"step": 12240
},
{
"epoch": 2.64,
"learning_rate": 2.3872890933801182e-05,
"loss": 3.0316,
"step": 12250
},
{
"epoch": 2.65,
"learning_rate": 2.3837966627421785e-05,
"loss": 3.0828,
"step": 12260
},
{
"epoch": 2.65,
"learning_rate": 2.380304459355043e-05,
"loss": 3.0851,
"step": 12270
},
{
"epoch": 2.65,
"learning_rate": 2.376812490048172e-05,
"loss": 3.0935,
"step": 12280
},
{
"epoch": 2.65,
"learning_rate": 2.3733207616505678e-05,
"loss": 3.0782,
"step": 12290
},
{
"epoch": 2.66,
"learning_rate": 2.3698292809907606e-05,
"loss": 3.0609,
"step": 12300
},
{
"epoch": 2.66,
"learning_rate": 2.3663380548967985e-05,
"loss": 3.0507,
"step": 12310
},
{
"epoch": 2.66,
"learning_rate": 2.3628470901962282e-05,
"loss": 3.1248,
"step": 12320
},
{
"epoch": 2.66,
"learning_rate": 2.3593563937160888e-05,
"loss": 3.0939,
"step": 12330
},
{
"epoch": 2.66,
"learning_rate": 2.3558659722828918e-05,
"loss": 3.0961,
"step": 12340
},
{
"epoch": 2.67,
"learning_rate": 2.3523758327226133e-05,
"loss": 3.0794,
"step": 12350
},
{
"epoch": 2.67,
"learning_rate": 2.3488859818606768e-05,
"loss": 3.0917,
"step": 12360
},
{
"epoch": 2.67,
"learning_rate": 2.3453964265219413e-05,
"loss": 3.0319,
"step": 12370
},
{
"epoch": 2.67,
"learning_rate": 2.3419071735306884e-05,
"loss": 3.124,
"step": 12380
},
{
"epoch": 2.67,
"learning_rate": 2.3384182297106062e-05,
"loss": 3.1025,
"step": 12390
},
{
"epoch": 2.68,
"learning_rate": 2.3349296018847834e-05,
"loss": 3.0926,
"step": 12400
},
{
"epoch": 2.68,
"learning_rate": 2.3314412968756855e-05,
"loss": 3.0543,
"step": 12410
},
{
"epoch": 2.68,
"learning_rate": 2.327953321505149e-05,
"loss": 3.0743,
"step": 12420
},
{
"epoch": 2.68,
"learning_rate": 2.3244656825943654e-05,
"loss": 3.0518,
"step": 12430
},
{
"epoch": 2.69,
"learning_rate": 2.3209783869638677e-05,
"loss": 3.0705,
"step": 12440
},
{
"epoch": 2.69,
"learning_rate": 2.317491441433518e-05,
"loss": 3.0462,
"step": 12450
},
{
"epoch": 2.69,
"learning_rate": 2.3140048528224945e-05,
"loss": 3.0676,
"step": 12460
},
{
"epoch": 2.69,
"learning_rate": 2.3105186279492757e-05,
"loss": 3.0791,
"step": 12470
},
{
"epoch": 2.69,
"learning_rate": 2.3070327736316304e-05,
"loss": 3.1268,
"step": 12480
},
{
"epoch": 2.7,
"learning_rate": 2.303547296686602e-05,
"loss": 3.0877,
"step": 12490
},
{
"epoch": 2.7,
"learning_rate": 2.3000622039304956e-05,
"loss": 3.0766,
"step": 12500
},
{
"epoch": 2.7,
"learning_rate": 2.2965775021788653e-05,
"loss": 3.132,
"step": 12510
},
{
"epoch": 2.7,
"learning_rate": 2.2930931982465004e-05,
"loss": 3.0841,
"step": 12520
},
{
"epoch": 2.71,
"learning_rate": 2.2896092989474132e-05,
"loss": 3.0925,
"step": 12530
},
{
"epoch": 2.71,
"learning_rate": 2.2861258110948237e-05,
"loss": 3.0371,
"step": 12540
},
{
"epoch": 2.71,
"learning_rate": 2.2826427415011466e-05,
"loss": 3.0535,
"step": 12550
},
{
"epoch": 2.71,
"learning_rate": 2.2791600969779796e-05,
"loss": 3.0623,
"step": 12560
},
{
"epoch": 2.71,
"learning_rate": 2.27567788433609e-05,
"loss": 3.0631,
"step": 12570
},
{
"epoch": 2.72,
"learning_rate": 2.2721961103853985e-05,
"loss": 3.0896,
"step": 12580
},
{
"epoch": 2.72,
"learning_rate": 2.2687147819349688e-05,
"loss": 3.1099,
"step": 12590
},
{
"epoch": 2.72,
"learning_rate": 2.265233905792993e-05,
"loss": 3.0572,
"step": 12600
},
{
"epoch": 2.72,
"learning_rate": 2.2617534887667806e-05,
"loss": 3.1104,
"step": 12610
},
{
"epoch": 2.72,
"learning_rate": 2.258273537662741e-05,
"loss": 3.0959,
"step": 12620
},
{
"epoch": 2.73,
"learning_rate": 2.2547940592863728e-05,
"loss": 3.1369,
"step": 12630
},
{
"epoch": 2.73,
"learning_rate": 2.251315060442251e-05,
"loss": 3.0734,
"step": 12640
},
{
"epoch": 2.73,
"learning_rate": 2.2478365479340118e-05,
"loss": 3.1181,
"step": 12650
},
{
"epoch": 2.73,
"learning_rate": 2.2443585285643412e-05,
"loss": 3.1178,
"step": 12660
},
{
"epoch": 2.74,
"learning_rate": 2.24088100913496e-05,
"loss": 3.0586,
"step": 12670
},
{
"epoch": 2.74,
"learning_rate": 2.2374039964466113e-05,
"loss": 3.1085,
"step": 12680
},
{
"epoch": 2.74,
"learning_rate": 2.2339274972990487e-05,
"loss": 3.0578,
"step": 12690
},
{
"epoch": 2.74,
"learning_rate": 2.2304515184910197e-05,
"loss": 3.0263,
"step": 12700
},
{
"epoch": 2.74,
"learning_rate": 2.226976066820255e-05,
"loss": 3.0977,
"step": 12710
},
{
"epoch": 2.75,
"learning_rate": 2.223501149083454e-05,
"loss": 3.0817,
"step": 12720
},
{
"epoch": 2.75,
"learning_rate": 2.2200267720762728e-05,
"loss": 3.0511,
"step": 12730
},
{
"epoch": 2.75,
"learning_rate": 2.216552942593309e-05,
"loss": 3.0776,
"step": 12740
},
{
"epoch": 2.75,
"learning_rate": 2.2130796674280893e-05,
"loss": 3.0923,
"step": 12750
},
{
"epoch": 2.75,
"learning_rate": 2.2096069533730587e-05,
"loss": 3.0393,
"step": 12760
},
{
"epoch": 2.76,
"learning_rate": 2.206134807219562e-05,
"loss": 3.0755,
"step": 12770
},
{
"epoch": 2.76,
"learning_rate": 2.2026632357578348e-05,
"loss": 3.1093,
"step": 12780
},
{
"epoch": 2.76,
"learning_rate": 2.1991922457769885e-05,
"loss": 3.0742,
"step": 12790
},
{
"epoch": 2.76,
"learning_rate": 2.1957218440649964e-05,
"loss": 3.0783,
"step": 12800
},
{
"epoch": 2.77,
"learning_rate": 2.192252037408684e-05,
"loss": 3.0407,
"step": 12810
},
{
"epoch": 2.77,
"learning_rate": 2.188782832593711e-05,
"loss": 3.1184,
"step": 12820
},
{
"epoch": 2.77,
"learning_rate": 2.18531423640456e-05,
"loss": 3.1023,
"step": 12830
},
{
"epoch": 2.77,
"learning_rate": 2.1818462556245246e-05,
"loss": 3.0721,
"step": 12840
},
{
"epoch": 2.77,
"learning_rate": 2.178378897035694e-05,
"loss": 3.1031,
"step": 12850
},
{
"epoch": 2.78,
"learning_rate": 2.1749121674189405e-05,
"loss": 3.0602,
"step": 12860
},
{
"epoch": 2.78,
"learning_rate": 2.171446073553907e-05,
"loss": 3.0958,
"step": 12870
},
{
"epoch": 2.78,
"learning_rate": 2.1679806222189924e-05,
"loss": 3.0477,
"step": 12880
},
{
"epoch": 2.78,
"learning_rate": 2.1645158201913402e-05,
"loss": 3.0522,
"step": 12890
},
{
"epoch": 2.78,
"learning_rate": 2.1610516742468227e-05,
"loss": 3.0632,
"step": 12900
},
{
"epoch": 2.79,
"learning_rate": 2.15758819116003e-05,
"loss": 3.0794,
"step": 12910
},
{
"epoch": 2.79,
"learning_rate": 2.1541253777042552e-05,
"loss": 3.0774,
"step": 12920
},
{
"epoch": 2.79,
"learning_rate": 2.150663240651483e-05,
"loss": 3.1031,
"step": 12930
},
{
"epoch": 2.79,
"learning_rate": 2.1472017867723747e-05,
"loss": 3.0277,
"step": 12940
},
{
"epoch": 2.8,
"learning_rate": 2.143741022836255e-05,
"loss": 3.0626,
"step": 12950
},
{
"epoch": 2.8,
"learning_rate": 2.1402809556110997e-05,
"loss": 3.049,
"step": 12960
},
{
"epoch": 2.8,
"learning_rate": 2.136821591863522e-05,
"loss": 3.0808,
"step": 12970
},
{
"epoch": 2.8,
"learning_rate": 2.1333629383587613e-05,
"loss": 3.069,
"step": 12980
},
{
"epoch": 2.8,
"learning_rate": 2.1299050018606648e-05,
"loss": 3.0232,
"step": 12990
},
{
"epoch": 2.81,
"learning_rate": 2.1264477891316792e-05,
"loss": 3.0622,
"step": 13000
},
{
"epoch": 2.81,
"learning_rate": 2.1229913069328353e-05,
"loss": 3.0584,
"step": 13010
},
{
"epoch": 2.81,
"learning_rate": 2.1195355620237366e-05,
"loss": 3.0836,
"step": 13020
},
{
"epoch": 2.81,
"learning_rate": 2.1160805611625425e-05,
"loss": 3.0443,
"step": 13030
},
{
"epoch": 2.82,
"learning_rate": 2.1126263111059586e-05,
"loss": 3.0655,
"step": 13040
},
{
"epoch": 2.82,
"learning_rate": 2.109172818609223e-05,
"loss": 3.0734,
"step": 13050
},
{
"epoch": 2.82,
"learning_rate": 2.105720090426091e-05,
"loss": 3.114,
"step": 13060
},
{
"epoch": 2.82,
"learning_rate": 2.102268133308823e-05,
"loss": 3.0608,
"step": 13070
},
{
"epoch": 2.82,
"learning_rate": 2.0988169540081728e-05,
"loss": 3.0571,
"step": 13080
},
{
"epoch": 2.83,
"learning_rate": 2.095366559273372e-05,
"loss": 3.0478,
"step": 13090
},
{
"epoch": 2.83,
"learning_rate": 2.091916955852118e-05,
"loss": 3.0968,
"step": 13100
},
{
"epoch": 2.83,
"learning_rate": 2.0884681504905608e-05,
"loss": 3.0641,
"step": 13110
},
{
"epoch": 2.83,
"learning_rate": 2.0850201499332904e-05,
"loss": 3.0924,
"step": 13120
},
{
"epoch": 2.83,
"learning_rate": 2.0815729609233215e-05,
"loss": 3.1327,
"step": 13130
},
{
"epoch": 2.84,
"learning_rate": 2.078126590202083e-05,
"loss": 3.0996,
"step": 13140
},
{
"epoch": 2.84,
"learning_rate": 2.0746810445094028e-05,
"loss": 3.117,
"step": 13150
},
{
"epoch": 2.84,
"learning_rate": 2.0712363305834955e-05,
"loss": 3.0755,
"step": 13160
},
{
"epoch": 2.84,
"learning_rate": 2.0677924551609495e-05,
"loss": 3.083,
"step": 13170
},
{
"epoch": 2.85,
"learning_rate": 2.0643494249767126e-05,
"loss": 3.0577,
"step": 13180
},
{
"epoch": 2.85,
"learning_rate": 2.0609072467640804e-05,
"loss": 3.1007,
"step": 13190
},
{
"epoch": 2.85,
"learning_rate": 2.0574659272546812e-05,
"loss": 2.9806,
"step": 13200
},
{
"epoch": 2.85,
"learning_rate": 2.0540254731784656e-05,
"loss": 3.086,
"step": 13210
},
{
"epoch": 2.85,
"learning_rate": 2.050585891263691e-05,
"loss": 3.0449,
"step": 13220
},
{
"epoch": 2.86,
"learning_rate": 2.047147188236909e-05,
"loss": 3.0903,
"step": 13230
},
{
"epoch": 2.86,
"learning_rate": 2.0437093708229528e-05,
"loss": 3.0774,
"step": 13240
},
{
"epoch": 2.86,
"learning_rate": 2.040272445744923e-05,
"loss": 3.0449,
"step": 13250
},
{
"epoch": 2.86,
"learning_rate": 2.0368364197241753e-05,
"loss": 3.0955,
"step": 13260
},
{
"epoch": 2.86,
"learning_rate": 2.0334012994803074e-05,
"loss": 3.0442,
"step": 13270
},
{
"epoch": 2.87,
"learning_rate": 2.0299670917311463e-05,
"loss": 3.058,
"step": 13280
},
{
"epoch": 2.87,
"learning_rate": 2.0265338031927336e-05,
"loss": 3.0525,
"step": 13290
},
{
"epoch": 2.87,
"learning_rate": 2.0231014405793134e-05,
"loss": 3.0448,
"step": 13300
},
{
"epoch": 2.87,
"learning_rate": 2.019670010603319e-05,
"loss": 3.0906,
"step": 13310
},
{
"epoch": 2.88,
"learning_rate": 2.0162395199753596e-05,
"loss": 3.0079,
"step": 13320
},
{
"epoch": 2.88,
"learning_rate": 2.0128099754042088e-05,
"loss": 3.0783,
"step": 13330
},
{
"epoch": 2.88,
"learning_rate": 2.0093813835967878e-05,
"loss": 3.0897,
"step": 13340
},
{
"epoch": 2.88,
"learning_rate": 2.0059537512581566e-05,
"loss": 3.0362,
"step": 13350
},
{
"epoch": 2.88,
"learning_rate": 2.0025270850914975e-05,
"loss": 3.0994,
"step": 13360
},
{
"epoch": 2.89,
"learning_rate": 1.9991013917981038e-05,
"loss": 3.0839,
"step": 13370
},
{
"epoch": 2.89,
"learning_rate": 1.9956766780773665e-05,
"loss": 3.0551,
"step": 13380
},
{
"epoch": 2.89,
"learning_rate": 1.9922529506267602e-05,
"loss": 3.0812,
"step": 13390
},
{
"epoch": 2.89,
"learning_rate": 1.9888302161418313e-05,
"loss": 3.1256,
"step": 13400
},
{
"epoch": 2.9,
"learning_rate": 1.985408481316184e-05,
"loss": 3.1231,
"step": 13410
},
{
"epoch": 2.9,
"learning_rate": 1.981987752841467e-05,
"loss": 3.066,
"step": 13420
},
{
"epoch": 2.9,
"learning_rate": 1.978568037407364e-05,
"loss": 3.0784,
"step": 13430
},
{
"epoch": 2.9,
"learning_rate": 1.9751493417015736e-05,
"loss": 3.0503,
"step": 13440
},
{
"epoch": 2.9,
"learning_rate": 1.9717316724098016e-05,
"loss": 3.0933,
"step": 13450
},
{
"epoch": 2.91,
"learning_rate": 1.9683150362157476e-05,
"loss": 3.0458,
"step": 13460
},
{
"epoch": 2.91,
"learning_rate": 1.9648994398010893e-05,
"loss": 3.0642,
"step": 13470
},
{
"epoch": 2.91,
"learning_rate": 1.9614848898454717e-05,
"loss": 3.1345,
"step": 13480
},
{
"epoch": 2.91,
"learning_rate": 1.958071393026493e-05,
"loss": 3.0581,
"step": 13490
},
{
"epoch": 2.91,
"learning_rate": 1.9546589560196925e-05,
"loss": 3.0417,
"step": 13500
},
{
"epoch": 2.92,
"learning_rate": 1.951247585498537e-05,
"loss": 3.0763,
"step": 13510
},
{
"epoch": 2.92,
"learning_rate": 1.9478372881344063e-05,
"loss": 3.0669,
"step": 13520
},
{
"epoch": 2.92,
"learning_rate": 1.944428070596583e-05,
"loss": 2.9967,
"step": 13530
},
{
"epoch": 2.92,
"learning_rate": 1.9410199395522367e-05,
"loss": 3.075,
"step": 13540
},
{
"epoch": 2.93,
"learning_rate": 1.9376129016664128e-05,
"loss": 3.0736,
"step": 13550
},
{
"epoch": 2.93,
"learning_rate": 1.93420696360202e-05,
"loss": 3.0856,
"step": 13560
},
{
"epoch": 2.93,
"learning_rate": 1.9308021320198135e-05,
"loss": 3.1077,
"step": 13570
},
{
"epoch": 2.93,
"learning_rate": 1.9273984135783872e-05,
"loss": 3.0321,
"step": 13580
},
{
"epoch": 2.93,
"learning_rate": 1.9239958149341572e-05,
"loss": 3.1347,
"step": 13590
},
{
"epoch": 2.94,
"learning_rate": 1.9205943427413492e-05,
"loss": 3.0587,
"step": 13600
},
{
"epoch": 2.94,
"learning_rate": 1.9171940036519864e-05,
"loss": 3.0254,
"step": 13610
},
{
"epoch": 2.94,
"learning_rate": 1.913794804315876e-05,
"loss": 3.1113,
"step": 13620
},
{
"epoch": 2.94,
"learning_rate": 1.9103967513805956e-05,
"loss": 3.1004,
"step": 13630
},
{
"epoch": 2.94,
"learning_rate": 1.9069998514914832e-05,
"loss": 3.0155,
"step": 13640
},
{
"epoch": 2.95,
"learning_rate": 1.9036041112916198e-05,
"loss": 3.1082,
"step": 13650
},
{
"epoch": 2.95,
"learning_rate": 1.9002095374218186e-05,
"loss": 3.0399,
"step": 13660
},
{
"epoch": 2.95,
"learning_rate": 1.8968161365206115e-05,
"loss": 3.1204,
"step": 13670
},
{
"epoch": 2.95,
"learning_rate": 1.8934239152242384e-05,
"loss": 3.1119,
"step": 13680
},
{
"epoch": 2.96,
"learning_rate": 1.8900328801666306e-05,
"loss": 3.1011,
"step": 13690
},
{
"epoch": 2.96,
"learning_rate": 1.8866430379794e-05,
"loss": 3.0728,
"step": 13700
},
{
"epoch": 2.96,
"learning_rate": 1.8832543952918256e-05,
"loss": 3.1189,
"step": 13710
},
{
"epoch": 2.96,
"learning_rate": 1.8798669587308416e-05,
"loss": 3.0881,
"step": 13720
},
{
"epoch": 2.96,
"learning_rate": 1.8764807349210213e-05,
"loss": 3.0758,
"step": 13730
},
{
"epoch": 2.97,
"learning_rate": 1.873095730484569e-05,
"loss": 3.1332,
"step": 13740
},
{
"epoch": 2.97,
"learning_rate": 1.869711952041303e-05,
"loss": 3.0424,
"step": 13750
},
{
"epoch": 2.97,
"learning_rate": 1.8663294062086432e-05,
"loss": 3.0782,
"step": 13760
},
{
"epoch": 2.97,
"learning_rate": 1.8629480996016e-05,
"loss": 3.017,
"step": 13770
},
{
"epoch": 2.97,
"learning_rate": 1.859568038832761e-05,
"loss": 3.0482,
"step": 13780
},
{
"epoch": 2.98,
"learning_rate": 1.856189230512276e-05,
"loss": 3.0514,
"step": 13790
},
{
"epoch": 2.98,
"learning_rate": 1.852811681247845e-05,
"loss": 3.1268,
"step": 13800
},
{
"epoch": 2.98,
"learning_rate": 1.849435397644708e-05,
"loss": 3.0204,
"step": 13810
},
{
"epoch": 2.98,
"learning_rate": 1.8460603863056285e-05,
"loss": 3.0401,
"step": 13820
},
{
"epoch": 2.99,
"learning_rate": 1.8426866538308803e-05,
"loss": 3.0728,
"step": 13830
},
{
"epoch": 2.99,
"learning_rate": 1.839314206818241e-05,
"loss": 3.0712,
"step": 13840
},
{
"epoch": 2.99,
"learning_rate": 1.8359430518629696e-05,
"loss": 3.02,
"step": 13850
},
{
"epoch": 2.99,
"learning_rate": 1.8325731955577995e-05,
"loss": 3.0477,
"step": 13860
},
{
"epoch": 2.99,
"learning_rate": 1.8292046444929256e-05,
"loss": 3.0242,
"step": 13870
},
{
"epoch": 3.0,
"learning_rate": 1.8258374052559895e-05,
"loss": 3.1099,
"step": 13880
},
{
"epoch": 3.0,
"learning_rate": 1.8224714844320673e-05,
"loss": 3.0553,
"step": 13890
},
{
"epoch": 3.0,
"eval_loss": 3.0703344345092773,
"eval_runtime": 191.332,
"eval_samples_per_second": 774.674,
"eval_steps_per_second": 24.209,
"step": 13896
},
{
"epoch": 3.0,
"learning_rate": 1.819106888603656e-05,
"loss": 3.0542,
"step": 13900
},
{
"epoch": 3.0,
"learning_rate": 1.8157436243506636e-05,
"loss": 3.0391,
"step": 13910
},
{
"epoch": 3.01,
"learning_rate": 1.812381698250392e-05,
"loss": 3.0663,
"step": 13920
},
{
"epoch": 3.01,
"learning_rate": 1.8090211168775264e-05,
"loss": 3.0202,
"step": 13930
},
{
"epoch": 3.01,
"learning_rate": 1.8056618868041233e-05,
"loss": 3.0616,
"step": 13940
},
{
"epoch": 3.01,
"learning_rate": 1.802304014599595e-05,
"loss": 3.0781,
"step": 13950
},
{
"epoch": 3.01,
"learning_rate": 1.7989475068307003e-05,
"loss": 3.0343,
"step": 13960
},
{
"epoch": 3.02,
"learning_rate": 1.7955923700615284e-05,
"loss": 3.0459,
"step": 13970
},
{
"epoch": 3.02,
"learning_rate": 1.7922386108534873e-05,
"loss": 3.0434,
"step": 13980
},
{
"epoch": 3.02,
"learning_rate": 1.788886235765291e-05,
"loss": 3.0426,
"step": 13990
},
{
"epoch": 3.02,
"learning_rate": 1.7855352513529466e-05,
"loss": 3.0538,
"step": 14000
},
{
"epoch": 3.02,
"learning_rate": 1.7821856641697425e-05,
"loss": 3.0079,
"step": 14010
},
{
"epoch": 3.03,
"learning_rate": 1.778837480766234e-05,
"loss": 3.0746,
"step": 14020
},
{
"epoch": 3.03,
"learning_rate": 1.7754907076902305e-05,
"loss": 3.0435,
"step": 14030
},
{
"epoch": 3.03,
"learning_rate": 1.772145351486783e-05,
"loss": 3.0989,
"step": 14040
},
{
"epoch": 3.03,
"learning_rate": 1.768801418698175e-05,
"loss": 3.0364,
"step": 14050
},
{
"epoch": 3.04,
"learning_rate": 1.7654589158639024e-05,
"loss": 3.0943,
"step": 14060
},
{
"epoch": 3.04,
"learning_rate": 1.7621178495206665e-05,
"loss": 3.0778,
"step": 14070
},
{
"epoch": 3.04,
"learning_rate": 1.7587782262023583e-05,
"loss": 3.1174,
"step": 14080
},
{
"epoch": 3.04,
"learning_rate": 1.7554400524400482e-05,
"loss": 3.0757,
"step": 14090
},
{
"epoch": 3.04,
"learning_rate": 1.7521033347619707e-05,
"loss": 3.0477,
"step": 14100
},
{
"epoch": 3.05,
"learning_rate": 1.748768079693513e-05,
"loss": 3.0232,
"step": 14110
},
{
"epoch": 3.05,
"learning_rate": 1.7454342937572016e-05,
"loss": 3.0792,
"step": 14120
},
{
"epoch": 3.05,
"learning_rate": 1.7421019834726914e-05,
"loss": 3.0128,
"step": 14130
},
{
"epoch": 3.05,
"learning_rate": 1.7387711553567496e-05,
"loss": 3.0611,
"step": 14140
},
{
"epoch": 3.05,
"learning_rate": 1.735441815923246e-05,
"loss": 3.0558,
"step": 14150
},
{
"epoch": 3.06,
"learning_rate": 1.7321139716831385e-05,
"loss": 3.094,
"step": 14160
},
{
"epoch": 3.06,
"learning_rate": 1.7287876291444615e-05,
"loss": 3.1006,
"step": 14170
},
{
"epoch": 3.06,
"learning_rate": 1.725462794812312e-05,
"loss": 3.0843,
"step": 14180
},
{
"epoch": 3.06,
"learning_rate": 1.722139475188838e-05,
"loss": 3.0466,
"step": 14190
},
{
"epoch": 3.07,
"learning_rate": 1.7188176767732252e-05,
"loss": 3.0243,
"step": 14200
},
{
"epoch": 3.07,
"learning_rate": 1.7154974060616845e-05,
"loss": 3.0615,
"step": 14210
},
{
"epoch": 3.07,
"learning_rate": 1.7121786695474383e-05,
"loss": 3.0615,
"step": 14220
},
{
"epoch": 3.07,
"learning_rate": 1.7088614737207105e-05,
"loss": 3.0557,
"step": 14230
},
{
"epoch": 3.07,
"learning_rate": 1.705545825068709e-05,
"loss": 3.0453,
"step": 14240
},
{
"epoch": 3.08,
"learning_rate": 1.702231730075619e-05,
"loss": 3.0437,
"step": 14250
},
{
"epoch": 3.08,
"learning_rate": 1.6989191952225863e-05,
"loss": 3.084,
"step": 14260
},
{
"epoch": 3.08,
"learning_rate": 1.6956082269877056e-05,
"loss": 3.0663,
"step": 14270
},
{
"epoch": 3.08,
"learning_rate": 1.6922988318460076e-05,
"loss": 3.0339,
"step": 14280
},
{
"epoch": 3.09,
"learning_rate": 1.6889910162694463e-05,
"loss": 3.06,
"step": 14290
},
{
"epoch": 3.09,
"learning_rate": 1.6856847867268876e-05,
"loss": 3.0486,
"step": 14300
},
{
"epoch": 3.09,
"learning_rate": 1.682380149684095e-05,
"loss": 3.0859,
"step": 14310
},
{
"epoch": 3.09,
"learning_rate": 1.679077111603718e-05,
"loss": 3.0324,
"step": 14320
},
{
"epoch": 3.09,
"learning_rate": 1.675775678945279e-05,
"loss": 3.078,
"step": 14330
},
{
"epoch": 3.1,
"learning_rate": 1.6724758581651607e-05,
"loss": 3.0725,
"step": 14340
},
{
"epoch": 3.1,
"learning_rate": 1.6691776557165932e-05,
"loss": 3.0497,
"step": 14350
},
{
"epoch": 3.1,
"learning_rate": 1.6658810780496437e-05,
"loss": 3.0521,
"step": 14360
},
{
"epoch": 3.1,
"learning_rate": 1.662586131611199e-05,
"loss": 3.0912,
"step": 14370
},
{
"epoch": 3.1,
"learning_rate": 1.6592928228449578e-05,
"loss": 3.0547,
"step": 14380
},
{
"epoch": 3.11,
"learning_rate": 1.6560011581914153e-05,
"loss": 3.022,
"step": 14390
},
{
"epoch": 3.11,
"learning_rate": 1.6527111440878518e-05,
"loss": 3.093,
"step": 14400
},
{
"epoch": 3.11,
"learning_rate": 1.6494227869683194e-05,
"loss": 3.0573,
"step": 14410
},
{
"epoch": 3.11,
"learning_rate": 1.6461360932636308e-05,
"loss": 3.0709,
"step": 14420
},
{
"epoch": 3.12,
"learning_rate": 1.6428510694013444e-05,
"loss": 3.0223,
"step": 14430
},
{
"epoch": 3.12,
"learning_rate": 1.6395677218057533e-05,
"loss": 3.0011,
"step": 14440
},
{
"epoch": 3.12,
"learning_rate": 1.6362860568978715e-05,
"loss": 3.0707,
"step": 14450
},
{
"epoch": 3.12,
"learning_rate": 1.633006081095426e-05,
"loss": 3.0621,
"step": 14460
},
{
"epoch": 3.12,
"learning_rate": 1.6297278008128362e-05,
"loss": 3.0078,
"step": 14470
},
{
"epoch": 3.13,
"learning_rate": 1.626451222461207e-05,
"loss": 3.0806,
"step": 14480
},
{
"epoch": 3.13,
"learning_rate": 1.6231763524483165e-05,
"loss": 3.0782,
"step": 14490
},
{
"epoch": 3.13,
"learning_rate": 1.6199031971786006e-05,
"loss": 3.0366,
"step": 14500
},
{
"epoch": 3.13,
"learning_rate": 1.6166317630531412e-05,
"loss": 3.0531,
"step": 14510
},
{
"epoch": 3.13,
"learning_rate": 1.613362056469656e-05,
"loss": 3.0323,
"step": 14520
},
{
"epoch": 3.14,
"learning_rate": 1.6100940838224828e-05,
"loss": 3.0446,
"step": 14530
},
{
"epoch": 3.14,
"learning_rate": 1.6068278515025688e-05,
"loss": 3.061,
"step": 14540
},
{
"epoch": 3.14,
"learning_rate": 1.6035633658974584e-05,
"loss": 3.0469,
"step": 14550
},
{
"epoch": 3.14,
"learning_rate": 1.600300633391279e-05,
"loss": 3.0177,
"step": 14560
},
{
"epoch": 3.15,
"learning_rate": 1.5970396603647308e-05,
"loss": 3.0747,
"step": 14570
},
{
"epoch": 3.15,
"learning_rate": 1.5937804531950724e-05,
"loss": 3.0483,
"step": 14580
},
{
"epoch": 3.15,
"learning_rate": 1.590523018256109e-05,
"loss": 3.0961,
"step": 14590
},
{
"epoch": 3.15,
"learning_rate": 1.58726736191818e-05,
"loss": 3.0801,
"step": 14600
},
{
"epoch": 3.15,
"learning_rate": 1.5840134905481467e-05,
"loss": 3.0833,
"step": 14610
},
{
"epoch": 3.16,
"learning_rate": 1.58076141050938e-05,
"loss": 3.0564,
"step": 14620
},
{
"epoch": 3.16,
"learning_rate": 1.5775111281617463e-05,
"loss": 3.0564,
"step": 14630
},
{
"epoch": 3.16,
"learning_rate": 1.5742626498615975e-05,
"loss": 3.0466,
"step": 14640
},
{
"epoch": 3.16,
"learning_rate": 1.5710159819617576e-05,
"loss": 3.0252,
"step": 14650
},
{
"epoch": 3.16,
"learning_rate": 1.5677711308115106e-05,
"loss": 3.0641,
"step": 14660
},
{
"epoch": 3.17,
"learning_rate": 1.5645281027565856e-05,
"loss": 3.0575,
"step": 14670
},
{
"epoch": 3.17,
"learning_rate": 1.5612869041391477e-05,
"loss": 3.0546,
"step": 14680
},
{
"epoch": 3.17,
"learning_rate": 1.5580475412977845e-05,
"loss": 3.0695,
"step": 14690
},
{
"epoch": 3.17,
"learning_rate": 1.5548100205674932e-05,
"loss": 3.0535,
"step": 14700
},
{
"epoch": 3.18,
"learning_rate": 1.5515743482796673e-05,
"loss": 3.0548,
"step": 14710
},
{
"epoch": 3.18,
"learning_rate": 1.5483405307620884e-05,
"loss": 3.0137,
"step": 14720
},
{
"epoch": 3.18,
"learning_rate": 1.5451085743389082e-05,
"loss": 3.0788,
"step": 14730
},
{
"epoch": 3.18,
"learning_rate": 1.5418784853306397e-05,
"loss": 3.0569,
"step": 14740
},
{
"epoch": 3.18,
"learning_rate": 1.538650270054144e-05,
"loss": 3.0883,
"step": 14750
},
{
"epoch": 3.19,
"learning_rate": 1.5354239348226174e-05,
"loss": 3.0535,
"step": 14760
},
{
"epoch": 3.19,
"learning_rate": 1.53219948594558e-05,
"loss": 3.0504,
"step": 14770
},
{
"epoch": 3.19,
"learning_rate": 1.528976929728863e-05,
"loss": 3.0356,
"step": 14780
},
{
"epoch": 3.19,
"learning_rate": 1.5257562724745957e-05,
"loss": 3.0523,
"step": 14790
},
{
"epoch": 3.2,
"learning_rate": 1.5225375204811943e-05,
"loss": 3.0525,
"step": 14800
},
{
"epoch": 3.2,
"learning_rate": 1.5193206800433487e-05,
"loss": 3.075,
"step": 14810
},
{
"epoch": 3.2,
"learning_rate": 1.5161057574520104e-05,
"loss": 3.0255,
"step": 14820
},
{
"epoch": 3.2,
"learning_rate": 1.5128927589943808e-05,
"loss": 3.0516,
"step": 14830
},
{
"epoch": 3.2,
"learning_rate": 1.5096816909538974e-05,
"loss": 3.0168,
"step": 14840
},
{
"epoch": 3.21,
"learning_rate": 1.5064725596102242e-05,
"loss": 3.047,
"step": 14850
},
{
"epoch": 3.21,
"learning_rate": 1.5032653712392346e-05,
"loss": 3.0618,
"step": 14860
},
{
"epoch": 3.21,
"learning_rate": 1.5000601321130076e-05,
"loss": 3.0317,
"step": 14870
},
{
"epoch": 3.21,
"learning_rate": 1.4968568484998047e-05,
"loss": 3.0799,
"step": 14880
},
{
"epoch": 3.21,
"learning_rate": 1.4936555266640665e-05,
"loss": 3.0745,
"step": 14890
},
{
"epoch": 3.22,
"learning_rate": 1.4904561728663952e-05,
"loss": 3.0677,
"step": 14900
},
{
"epoch": 3.22,
"learning_rate": 1.4872587933635458e-05,
"loss": 3.0482,
"step": 14910
},
{
"epoch": 3.22,
"learning_rate": 1.4840633944084109e-05,
"loss": 3.0413,
"step": 14920
},
{
"epoch": 3.22,
"learning_rate": 1.4808699822500105e-05,
"loss": 3.0743,
"step": 14930
},
{
"epoch": 3.23,
"learning_rate": 1.4776785631334799e-05,
"loss": 3.0688,
"step": 14940
},
{
"epoch": 3.23,
"learning_rate": 1.4744891433000558e-05,
"loss": 3.0695,
"step": 14950
},
{
"epoch": 3.23,
"learning_rate": 1.4713017289870647e-05,
"loss": 3.0509,
"step": 14960
},
{
"epoch": 3.23,
"learning_rate": 1.4681163264279124e-05,
"loss": 3.0831,
"step": 14970
},
{
"epoch": 3.23,
"learning_rate": 1.4649329418520697e-05,
"loss": 3.0686,
"step": 14980
},
{
"epoch": 3.24,
"learning_rate": 1.4617515814850603e-05,
"loss": 3.0389,
"step": 14990
},
{
"epoch": 3.24,
"learning_rate": 1.45857225154845e-05,
"loss": 3.0766,
"step": 15000
},
{
"epoch": 3.24,
"learning_rate": 1.4553949582598345e-05,
"loss": 3.0647,
"step": 15010
},
{
"epoch": 3.24,
"learning_rate": 1.4522197078328253e-05,
"loss": 3.0675,
"step": 15020
},
{
"epoch": 3.24,
"learning_rate": 1.4490465064770392e-05,
"loss": 3.0314,
"step": 15030
},
{
"epoch": 3.25,
"learning_rate": 1.4458753603980866e-05,
"loss": 3.0655,
"step": 15040
},
{
"epoch": 3.25,
"learning_rate": 1.4427062757975573e-05,
"loss": 3.0644,
"step": 15050
},
{
"epoch": 3.25,
"learning_rate": 1.4395392588730095e-05,
"loss": 3.0548,
"step": 15060
},
{
"epoch": 3.25,
"learning_rate": 1.4363743158179598e-05,
"loss": 3.0627,
"step": 15070
},
{
"epoch": 3.26,
"learning_rate": 1.433211452821868e-05,
"loss": 3.078,
"step": 15080
},
{
"epoch": 3.26,
"learning_rate": 1.4300506760701248e-05,
"loss": 3.0849,
"step": 15090
},
{
"epoch": 3.26,
"learning_rate": 1.4268919917440423e-05,
"loss": 3.0851,
"step": 15100
},
{
"epoch": 3.26,
"learning_rate": 1.4237354060208402e-05,
"loss": 2.9985,
"step": 15110
},
{
"epoch": 3.26,
"learning_rate": 1.4205809250736347e-05,
"loss": 3.0449,
"step": 15120
},
{
"epoch": 3.27,
"learning_rate": 1.4174285550714247e-05,
"loss": 3.0482,
"step": 15130
},
{
"epoch": 3.27,
"learning_rate": 1.4142783021790817e-05,
"loss": 3.0242,
"step": 15140
},
{
"epoch": 3.27,
"learning_rate": 1.4111301725573367e-05,
"loss": 3.0998,
"step": 15150
},
{
"epoch": 3.27,
"learning_rate": 1.4079841723627688e-05,
"loss": 3.0741,
"step": 15160
},
{
"epoch": 3.28,
"learning_rate": 1.4048403077477918e-05,
"loss": 3.052,
"step": 15170
},
{
"epoch": 3.28,
"learning_rate": 1.4016985848606435e-05,
"loss": 3.0273,
"step": 15180
},
{
"epoch": 3.28,
"learning_rate": 1.3985590098453738e-05,
"loss": 3.0439,
"step": 15190
},
{
"epoch": 3.28,
"learning_rate": 1.3954215888418318e-05,
"loss": 3.0551,
"step": 15200
},
{
"epoch": 3.28,
"learning_rate": 1.3922863279856535e-05,
"loss": 3.1111,
"step": 15210
},
{
"epoch": 3.29,
"learning_rate": 1.3891532334082518e-05,
"loss": 3.0981,
"step": 15220
},
{
"epoch": 3.29,
"learning_rate": 1.386022311236802e-05,
"loss": 3.0954,
"step": 15230
},
{
"epoch": 3.29,
"learning_rate": 1.38289356759423e-05,
"loss": 3.0783,
"step": 15240
},
{
"epoch": 3.29,
"learning_rate": 1.3797670085992053e-05,
"loss": 3.0643,
"step": 15250
},
{
"epoch": 3.29,
"learning_rate": 1.3766426403661215e-05,
"loss": 3.0291,
"step": 15260
},
{
"epoch": 3.3,
"learning_rate": 1.3735204690050879e-05,
"loss": 3.0613,
"step": 15270
},
{
"epoch": 3.3,
"learning_rate": 1.3704005006219189e-05,
"loss": 3.0073,
"step": 15280
},
{
"epoch": 3.3,
"learning_rate": 1.3672827413181207e-05,
"loss": 3.0438,
"step": 15290
},
{
"epoch": 3.3,
"learning_rate": 1.3641671971908781e-05,
"loss": 3.05,
"step": 15300
},
{
"epoch": 3.31,
"learning_rate": 1.3610538743330443e-05,
"loss": 3.0315,
"step": 15310
},
{
"epoch": 3.31,
"learning_rate": 1.35794277883313e-05,
"loss": 3.0834,
"step": 15320
},
{
"epoch": 3.31,
"learning_rate": 1.3548339167752888e-05,
"loss": 3.0867,
"step": 15330
},
{
"epoch": 3.31,
"learning_rate": 1.3517272942393055e-05,
"loss": 3.0431,
"step": 15340
},
{
"epoch": 3.31,
"learning_rate": 1.348622917300587e-05,
"loss": 3.1059,
"step": 15350
},
{
"epoch": 3.32,
"learning_rate": 1.3455207920301477e-05,
"loss": 3.0624,
"step": 15360
},
{
"epoch": 3.32,
"learning_rate": 1.3424209244945984e-05,
"loss": 3.0538,
"step": 15370
},
{
"epoch": 3.32,
"learning_rate": 1.3393233207561356e-05,
"loss": 3.0448,
"step": 15380
},
{
"epoch": 3.32,
"learning_rate": 1.3362279868725278e-05,
"loss": 3.0791,
"step": 15390
},
{
"epoch": 3.32,
"learning_rate": 1.3331349288971046e-05,
"loss": 3.0532,
"step": 15400
},
{
"epoch": 3.33,
"learning_rate": 1.3300441528787449e-05,
"loss": 3.062,
"step": 15410
},
{
"epoch": 3.33,
"learning_rate": 1.3269556648618648e-05,
"loss": 3.0507,
"step": 15420
},
{
"epoch": 3.33,
"learning_rate": 1.3238694708864063e-05,
"loss": 3.0964,
"step": 15430
},
{
"epoch": 3.33,
"learning_rate": 1.3207855769878247e-05,
"loss": 3.0408,
"step": 15440
},
{
"epoch": 3.34,
"learning_rate": 1.3177039891970777e-05,
"loss": 3.0065,
"step": 15450
},
{
"epoch": 3.34,
"learning_rate": 1.314624713540612e-05,
"loss": 3.0178,
"step": 15460
},
{
"epoch": 3.34,
"learning_rate": 1.3115477560403532e-05,
"loss": 3.0697,
"step": 15470
},
{
"epoch": 3.34,
"learning_rate": 1.3084731227136948e-05,
"loss": 3.089,
"step": 15480
},
{
"epoch": 3.34,
"learning_rate": 1.3054008195734834e-05,
"loss": 3.0363,
"step": 15490
},
{
"epoch": 3.35,
"learning_rate": 1.3023308526280093e-05,
"loss": 3.0394,
"step": 15500
},
{
"epoch": 3.35,
"learning_rate": 1.2992632278809933e-05,
"loss": 3.0578,
"step": 15510
},
{
"epoch": 3.35,
"learning_rate": 1.2961979513315764e-05,
"loss": 3.0156,
"step": 15520
},
{
"epoch": 3.35,
"learning_rate": 1.2931350289743077e-05,
"loss": 3.0096,
"step": 15530
},
{
"epoch": 3.35,
"learning_rate": 1.2900744667991316e-05,
"loss": 3.0001,
"step": 15540
},
{
"epoch": 3.36,
"learning_rate": 1.287016270791377e-05,
"loss": 3.0078,
"step": 15550
},
{
"epoch": 3.36,
"learning_rate": 1.2839604469317462e-05,
"loss": 3.0303,
"step": 15560
},
{
"epoch": 3.36,
"learning_rate": 1.2809070011963014e-05,
"loss": 3.0857,
"step": 15570
},
{
"epoch": 3.36,
"learning_rate": 1.2778559395564548e-05,
"loss": 3.0598,
"step": 15580
},
{
"epoch": 3.37,
"learning_rate": 1.2748072679789564e-05,
"loss": 3.0147,
"step": 15590
},
{
"epoch": 3.37,
"learning_rate": 1.2717609924258811e-05,
"loss": 3.0968,
"step": 15600
},
{
"epoch": 3.37,
"learning_rate": 1.2687171188546187e-05,
"loss": 3.015,
"step": 15610
},
{
"epoch": 3.37,
"learning_rate": 1.2656756532178615e-05,
"loss": 3.0335,
"step": 15620
},
{
"epoch": 3.37,
"learning_rate": 1.2626366014635932e-05,
"loss": 3.0618,
"step": 15630
},
{
"epoch": 3.38,
"learning_rate": 1.2595999695350766e-05,
"loss": 3.0203,
"step": 15640
},
{
"epoch": 3.38,
"learning_rate": 1.2565657633708416e-05,
"loss": 3.0543,
"step": 15650
},
{
"epoch": 3.38,
"learning_rate": 1.2535339889046749e-05,
"loss": 3.0578,
"step": 15660
},
{
"epoch": 3.38,
"learning_rate": 1.2505046520656073e-05,
"loss": 3.0704,
"step": 15670
},
{
"epoch": 3.39,
"learning_rate": 1.2474777587779018e-05,
"loss": 3.0626,
"step": 15680
},
{
"epoch": 3.39,
"learning_rate": 1.2444533149610457e-05,
"loss": 3.0391,
"step": 15690
},
{
"epoch": 3.39,
"learning_rate": 1.2414313265297329e-05,
"loss": 3.0568,
"step": 15700
},
{
"epoch": 3.39,
"learning_rate": 1.2384117993938566e-05,
"loss": 3.0478,
"step": 15710
},
{
"epoch": 3.39,
"learning_rate": 1.2353947394584961e-05,
"loss": 3.0834,
"step": 15720
},
{
"epoch": 3.4,
"learning_rate": 1.2323801526239068e-05,
"loss": 3.0699,
"step": 15730
},
{
"epoch": 3.4,
"learning_rate": 1.2293680447855067e-05,
"loss": 3.0144,
"step": 15740
},
{
"epoch": 3.4,
"learning_rate": 1.2263584218338658e-05,
"loss": 3.027,
"step": 15750
},
{
"epoch": 3.4,
"learning_rate": 1.2233512896546944e-05,
"loss": 3.0298,
"step": 15760
},
{
"epoch": 3.4,
"learning_rate": 1.2203466541288344e-05,
"loss": 3.0119,
"step": 15770
},
{
"epoch": 3.41,
"learning_rate": 1.2173445211322415e-05,
"loss": 3.0436,
"step": 15780
},
{
"epoch": 3.41,
"learning_rate": 1.2143448965359793e-05,
"loss": 3.0477,
"step": 15790
},
{
"epoch": 3.41,
"learning_rate": 1.2113477862062053e-05,
"loss": 3.0715,
"step": 15800
},
{
"epoch": 3.41,
"learning_rate": 1.2083531960041605e-05,
"loss": 3.0144,
"step": 15810
},
{
"epoch": 3.42,
"learning_rate": 1.2053611317861568e-05,
"loss": 3.0531,
"step": 15820
},
{
"epoch": 3.42,
"learning_rate": 1.202371599403567e-05,
"loss": 3.0482,
"step": 15830
},
{
"epoch": 3.42,
"learning_rate": 1.1993846047028117e-05,
"loss": 3.0403,
"step": 15840
},
{
"epoch": 3.42,
"learning_rate": 1.1964001535253496e-05,
"loss": 3.065,
"step": 15850
},
{
"epoch": 3.42,
"learning_rate": 1.193418251707665e-05,
"loss": 3.0052,
"step": 15860
},
{
"epoch": 3.43,
"learning_rate": 1.1904389050812558e-05,
"loss": 3.0018,
"step": 15870
},
{
"epoch": 3.43,
"learning_rate": 1.187462119472623e-05,
"loss": 3.0484,
"step": 15880
},
{
"epoch": 3.43,
"learning_rate": 1.1844879007032613e-05,
"loss": 3.0394,
"step": 15890
},
{
"epoch": 3.43,
"learning_rate": 1.1815162545896435e-05,
"loss": 3.0637,
"step": 15900
},
{
"epoch": 3.43,
"learning_rate": 1.178547186943211e-05,
"loss": 3.0781,
"step": 15910
},
{
"epoch": 3.44,
"learning_rate": 1.1755807035703643e-05,
"loss": 3.0903,
"step": 15920
},
{
"epoch": 3.44,
"learning_rate": 1.1726168102724484e-05,
"loss": 3.0391,
"step": 15930
},
{
"epoch": 3.44,
"learning_rate": 1.1696555128457437e-05,
"loss": 3.0797,
"step": 15940
},
{
"epoch": 3.44,
"learning_rate": 1.1666968170814549e-05,
"loss": 3.0513,
"step": 15950
},
{
"epoch": 3.45,
"learning_rate": 1.1637407287656974e-05,
"loss": 3.0669,
"step": 15960
},
{
"epoch": 3.45,
"learning_rate": 1.1607872536794883e-05,
"loss": 3.0438,
"step": 15970
},
{
"epoch": 3.45,
"learning_rate": 1.1578363975987338e-05,
"loss": 3.086,
"step": 15980
},
{
"epoch": 3.45,
"learning_rate": 1.1548881662942185e-05,
"loss": 3.0371,
"step": 15990
},
{
"epoch": 3.45,
"learning_rate": 1.1519425655315939e-05,
"loss": 3.0572,
"step": 16000
},
{
"epoch": 3.46,
"learning_rate": 1.1489996010713667e-05,
"loss": 3.0275,
"step": 16010
},
{
"epoch": 3.46,
"learning_rate": 1.1460592786688887e-05,
"loss": 3.0097,
"step": 16020
},
{
"epoch": 3.46,
"learning_rate": 1.1431216040743442e-05,
"loss": 3.0145,
"step": 16030
},
{
"epoch": 3.46,
"learning_rate": 1.1401865830327397e-05,
"loss": 3.053,
"step": 16040
},
{
"epoch": 3.47,
"learning_rate": 1.1372542212838919e-05,
"loss": 3.0638,
"step": 16050
},
{
"epoch": 3.47,
"learning_rate": 1.1343245245624176e-05,
"loss": 3.0592,
"step": 16060
},
{
"epoch": 3.47,
"learning_rate": 1.1313974985977216e-05,
"loss": 3.0128,
"step": 16070
},
{
"epoch": 3.47,
"learning_rate": 1.1284731491139849e-05,
"loss": 3.0444,
"step": 16080
},
{
"epoch": 3.47,
"learning_rate": 1.1255514818301543e-05,
"loss": 3.0062,
"step": 16090
},
{
"epoch": 3.48,
"learning_rate": 1.1226325024599337e-05,
"loss": 3.0262,
"step": 16100
},
{
"epoch": 3.48,
"learning_rate": 1.1197162167117677e-05,
"loss": 3.0214,
"step": 16110
},
{
"epoch": 3.48,
"learning_rate": 1.1168026302888338e-05,
"loss": 3.0697,
"step": 16120
},
{
"epoch": 3.48,
"learning_rate": 1.113891748889031e-05,
"loss": 3.0522,
"step": 16130
},
{
"epoch": 3.48,
"learning_rate": 1.110983578204968e-05,
"loss": 3.0469,
"step": 16140
},
{
"epoch": 3.49,
"learning_rate": 1.1080781239239522e-05,
"loss": 3.061,
"step": 16150
},
{
"epoch": 3.49,
"learning_rate": 1.1051753917279791e-05,
"loss": 3.0644,
"step": 16160
},
{
"epoch": 3.49,
"learning_rate": 1.102275387293721e-05,
"loss": 3.0725,
"step": 16170
},
{
"epoch": 3.49,
"learning_rate": 1.0993781162925152e-05,
"loss": 3.0252,
"step": 16180
},
{
"epoch": 3.5,
"learning_rate": 1.0964835843903534e-05,
"loss": 3.0684,
"step": 16190
},
{
"epoch": 3.5,
"learning_rate": 1.093591797247871e-05,
"loss": 3.0862,
"step": 16200
},
{
"epoch": 3.5,
"learning_rate": 1.0907027605203355e-05,
"loss": 3.0325,
"step": 16210
},
{
"epoch": 3.5,
"learning_rate": 1.0878164798576346e-05,
"loss": 3.1146,
"step": 16220
},
{
"epoch": 3.5,
"learning_rate": 1.0849329609042689e-05,
"loss": 3.0475,
"step": 16230
},
{
"epoch": 3.51,
"learning_rate": 1.0820522092993355e-05,
"loss": 3.0469,
"step": 16240
},
{
"epoch": 3.51,
"learning_rate": 1.0791742306765205e-05,
"loss": 3.0327,
"step": 16250
},
{
"epoch": 3.51,
"learning_rate": 1.0762990306640868e-05,
"loss": 3.0678,
"step": 16260
},
{
"epoch": 3.51,
"learning_rate": 1.0734266148848641e-05,
"loss": 3.0684,
"step": 16270
},
{
"epoch": 3.51,
"learning_rate": 1.0705569889562361e-05,
"loss": 3.025,
"step": 16280
},
{
"epoch": 3.52,
"learning_rate": 1.0676901584901306e-05,
"loss": 3.1052,
"step": 16290
},
{
"epoch": 3.52,
"learning_rate": 1.0648261290930106e-05,
"loss": 3.0832,
"step": 16300
},
{
"epoch": 3.52,
"learning_rate": 1.0619649063658588e-05,
"loss": 3.0804,
"step": 16310
},
{
"epoch": 3.52,
"learning_rate": 1.0591064959041702e-05,
"loss": 3.0272,
"step": 16320
},
{
"epoch": 3.53,
"learning_rate": 1.0562509032979398e-05,
"loss": 3.0792,
"step": 16330
},
{
"epoch": 3.53,
"learning_rate": 1.0533981341316518e-05,
"loss": 3.0582,
"step": 16340
},
{
"epoch": 3.53,
"learning_rate": 1.050548193984269e-05,
"loss": 3.0292,
"step": 16350
},
{
"epoch": 3.53,
"learning_rate": 1.0477010884292218e-05,
"loss": 3.0363,
"step": 16360
},
{
"epoch": 3.53,
"learning_rate": 1.0448568230343967e-05,
"loss": 3.0162,
"step": 16370
},
{
"epoch": 3.54,
"learning_rate": 1.042015403362126e-05,
"loss": 3.0717,
"step": 16380
},
{
"epoch": 3.54,
"learning_rate": 1.0391768349691774e-05,
"loss": 3.0834,
"step": 16390
},
{
"epoch": 3.54,
"learning_rate": 1.0363411234067424e-05,
"loss": 3.0954,
"step": 16400
},
{
"epoch": 3.54,
"learning_rate": 1.0335082742204249e-05,
"loss": 3.1055,
"step": 16410
},
{
"epoch": 3.54,
"learning_rate": 1.0306782929502318e-05,
"loss": 3.0769,
"step": 16420
},
{
"epoch": 3.55,
"learning_rate": 1.0278511851305608e-05,
"loss": 3.0373,
"step": 16430
},
{
"epoch": 3.55,
"learning_rate": 1.0250269562901907e-05,
"loss": 3.0572,
"step": 16440
},
{
"epoch": 3.55,
"learning_rate": 1.02220561195227e-05,
"loss": 2.9998,
"step": 16450
},
{
"epoch": 3.55,
"learning_rate": 1.0193871576343062e-05,
"loss": 3.0439,
"step": 16460
},
{
"epoch": 3.56,
"learning_rate": 1.0165715988481545e-05,
"loss": 3.0393,
"step": 16470
},
{
"epoch": 3.56,
"learning_rate": 1.0137589411000079e-05,
"loss": 3.0156,
"step": 16480
},
{
"epoch": 3.56,
"learning_rate": 1.0109491898903863e-05,
"loss": 3.0427,
"step": 16490
},
{
"epoch": 3.56,
"learning_rate": 1.008142350714124e-05,
"loss": 3.0648,
"step": 16500
},
{
"epoch": 3.56,
"learning_rate": 1.005338429060364e-05,
"loss": 3.0658,
"step": 16510
},
{
"epoch": 3.57,
"learning_rate": 1.0025374304125399e-05,
"loss": 3.0478,
"step": 16520
},
{
"epoch": 3.57,
"learning_rate": 9.997393602483705e-06,
"loss": 3.0559,
"step": 16530
},
{
"epoch": 3.57,
"learning_rate": 9.969442240398474e-06,
"loss": 3.0437,
"step": 16540
},
{
"epoch": 3.57,
"learning_rate": 9.94152027253225e-06,
"loss": 3.0383,
"step": 16550
},
{
"epoch": 3.58,
"learning_rate": 9.913627753490084e-06,
"loss": 3.0485,
"step": 16560
},
{
"epoch": 3.58,
"learning_rate": 9.885764737819444e-06,
"loss": 3.0746,
"step": 16570
},
{
"epoch": 3.58,
"learning_rate": 9.857931280010094e-06,
"loss": 2.9857,
"step": 16580
},
{
"epoch": 3.58,
"learning_rate": 9.830127434493997e-06,
"loss": 3.0314,
"step": 16590
},
{
"epoch": 3.58,
"learning_rate": 9.802353255645202e-06,
"loss": 3.0557,
"step": 16600
},
{
"epoch": 3.59,
"learning_rate": 9.77460879777975e-06,
"loss": 3.0553,
"step": 16610
},
{
"epoch": 3.59,
"learning_rate": 9.746894115155547e-06,
"loss": 3.0923,
"step": 16620
},
{
"epoch": 3.59,
"learning_rate": 9.719209261972279e-06,
"loss": 3.1046,
"step": 16630
},
{
"epoch": 3.59,
"learning_rate": 9.691554292371285e-06,
"loss": 3.0573,
"step": 16640
},
{
"epoch": 3.59,
"learning_rate": 9.66392926043548e-06,
"loss": 3.0554,
"step": 16650
},
{
"epoch": 3.6,
"learning_rate": 9.636334220189216e-06,
"loss": 3.0426,
"step": 16660
},
{
"epoch": 3.6,
"learning_rate": 9.608769225598193e-06,
"loss": 3.0102,
"step": 16670
},
{
"epoch": 3.6,
"learning_rate": 9.581234330569375e-06,
"loss": 3.0427,
"step": 16680
},
{
"epoch": 3.6,
"learning_rate": 9.553729588950838e-06,
"loss": 3.0588,
"step": 16690
},
{
"epoch": 3.61,
"learning_rate": 9.526255054531694e-06,
"loss": 3.0561,
"step": 16700
},
{
"epoch": 3.61,
"learning_rate": 9.498810781041986e-06,
"loss": 3.0464,
"step": 16710
},
{
"epoch": 3.61,
"learning_rate": 9.471396822152579e-06,
"loss": 3.0462,
"step": 16720
},
{
"epoch": 3.61,
"learning_rate": 9.444013231475043e-06,
"loss": 3.0231,
"step": 16730
},
{
"epoch": 3.61,
"learning_rate": 9.41666006256156e-06,
"loss": 3.0708,
"step": 16740
},
{
"epoch": 3.62,
"learning_rate": 9.389337368904849e-06,
"loss": 3.0786,
"step": 16750
},
{
"epoch": 3.62,
"learning_rate": 9.362045203937989e-06,
"loss": 3.0496,
"step": 16760
},
{
"epoch": 3.62,
"learning_rate": 9.334783621034377e-06,
"loss": 3.0493,
"step": 16770
},
{
"epoch": 3.62,
"learning_rate": 9.3075526735076e-06,
"loss": 3.0818,
"step": 16780
},
{
"epoch": 3.62,
"learning_rate": 9.280352414611332e-06,
"loss": 3.0362,
"step": 16790
},
{
"epoch": 3.63,
"learning_rate": 9.25318289753923e-06,
"loss": 3.0442,
"step": 16800
},
{
"epoch": 3.63,
"learning_rate": 9.22604417542484e-06,
"loss": 3.0519,
"step": 16810
},
{
"epoch": 3.63,
"learning_rate": 9.19893630134147e-06,
"loss": 3.0601,
"step": 16820
},
{
"epoch": 3.63,
"learning_rate": 9.171859328302112e-06,
"loss": 3.0114,
"step": 16830
},
{
"epoch": 3.64,
"learning_rate": 9.144813309259328e-06,
"loss": 3.0107,
"step": 16840
},
{
"epoch": 3.64,
"learning_rate": 9.117798297105135e-06,
"loss": 3.0195,
"step": 16850
},
{
"epoch": 3.64,
"learning_rate": 9.09081434467092e-06,
"loss": 3.0925,
"step": 16860
},
{
"epoch": 3.64,
"learning_rate": 9.063861504727326e-06,
"loss": 3.0653,
"step": 16870
},
{
"epoch": 3.64,
"learning_rate": 9.03693982998415e-06,
"loss": 3.0273,
"step": 16880
},
{
"epoch": 3.65,
"learning_rate": 9.010049373090252e-06,
"loss": 3.0293,
"step": 16890
},
{
"epoch": 3.65,
"learning_rate": 8.983190186633422e-06,
"loss": 3.0186,
"step": 16900
},
{
"epoch": 3.65,
"learning_rate": 8.956362323140307e-06,
"loss": 3.038,
"step": 16910
},
{
"epoch": 3.65,
"learning_rate": 8.929565835076312e-06,
"loss": 3.0942,
"step": 16920
},
{
"epoch": 3.66,
"learning_rate": 8.90280077484546e-06,
"loss": 3.0066,
"step": 16930
},
{
"epoch": 3.66,
"learning_rate": 8.876067194790325e-06,
"loss": 3.0067,
"step": 16940
},
{
"epoch": 3.66,
"learning_rate": 8.849365147191915e-06,
"loss": 3.0401,
"step": 16950
},
{
"epoch": 3.66,
"learning_rate": 8.822694684269569e-06,
"loss": 3.0869,
"step": 16960
},
{
"epoch": 3.66,
"learning_rate": 8.796055858180862e-06,
"loss": 3.0214,
"step": 16970
},
{
"epoch": 3.67,
"learning_rate": 8.7694487210215e-06,
"loss": 3.0488,
"step": 16980
},
{
"epoch": 3.67,
"learning_rate": 8.742873324825213e-06,
"loss": 3.0232,
"step": 16990
},
{
"epoch": 3.67,
"learning_rate": 8.716329721563662e-06,
"loss": 3.0763,
"step": 17000
},
{
"epoch": 3.67,
"learning_rate": 8.689817963146327e-06,
"loss": 3.0195,
"step": 17010
},
{
"epoch": 3.67,
"learning_rate": 8.663338101420414e-06,
"loss": 3.0491,
"step": 17020
},
{
"epoch": 3.68,
"learning_rate": 8.636890188170757e-06,
"loss": 3.0477,
"step": 17030
},
{
"epoch": 3.68,
"learning_rate": 8.610474275119702e-06,
"loss": 3.0163,
"step": 17040
},
{
"epoch": 3.68,
"learning_rate": 8.584090413927014e-06,
"loss": 3.0518,
"step": 17050
},
{
"epoch": 3.68,
"learning_rate": 8.557738656189784e-06,
"loss": 3.0356,
"step": 17060
},
{
"epoch": 3.69,
"learning_rate": 8.531419053442315e-06,
"loss": 3.0473,
"step": 17070
},
{
"epoch": 3.69,
"learning_rate": 8.505131657156032e-06,
"loss": 3.0072,
"step": 17080
},
{
"epoch": 3.69,
"learning_rate": 8.478876518739364e-06,
"loss": 3.0617,
"step": 17090
},
{
"epoch": 3.69,
"learning_rate": 8.45265368953767e-06,
"loss": 3.0753,
"step": 17100
},
{
"epoch": 3.69,
"learning_rate": 8.426463220833109e-06,
"loss": 3.0968,
"step": 17110
},
{
"epoch": 3.7,
"learning_rate": 8.400305163844577e-06,
"loss": 3.1078,
"step": 17120
},
{
"epoch": 3.7,
"learning_rate": 8.374179569727563e-06,
"loss": 3.0648,
"step": 17130
},
{
"epoch": 3.7,
"learning_rate": 8.348086489574084e-06,
"loss": 3.0642,
"step": 17140
},
{
"epoch": 3.7,
"learning_rate": 8.32202597441256e-06,
"loss": 3.0434,
"step": 17150
},
{
"epoch": 3.7,
"learning_rate": 8.295998075207736e-06,
"loss": 3.0191,
"step": 17160
},
{
"epoch": 3.71,
"learning_rate": 8.270002842860569e-06,
"loss": 3.041,
"step": 17170
},
{
"epoch": 3.71,
"learning_rate": 8.24404032820813e-06,
"loss": 3.0984,
"step": 17180
},
{
"epoch": 3.71,
"learning_rate": 8.218110582023512e-06,
"loss": 3.0511,
"step": 17190
},
{
"epoch": 3.71,
"learning_rate": 8.192213655015704e-06,
"loss": 3.0083,
"step": 17200
},
{
"epoch": 3.72,
"learning_rate": 8.166349597829551e-06,
"loss": 3.0806,
"step": 17210
},
{
"epoch": 3.72,
"learning_rate": 8.140518461045588e-06,
"loss": 3.0186,
"step": 17220
},
{
"epoch": 3.72,
"learning_rate": 8.114720295179973e-06,
"loss": 3.0383,
"step": 17230
},
{
"epoch": 3.72,
"learning_rate": 8.088955150684393e-06,
"loss": 3.0436,
"step": 17240
},
{
"epoch": 3.72,
"learning_rate": 8.063223077945956e-06,
"loss": 3.0402,
"step": 17250
},
{
"epoch": 3.73,
"learning_rate": 8.037524127287083e-06,
"loss": 3.0254,
"step": 17260
},
{
"epoch": 3.73,
"learning_rate": 8.011858348965435e-06,
"loss": 3.0468,
"step": 17270
},
{
"epoch": 3.73,
"learning_rate": 7.98622579317379e-06,
"loss": 3.033,
"step": 17280
},
{
"epoch": 3.73,
"learning_rate": 7.960626510039965e-06,
"loss": 3.0599,
"step": 17290
},
{
"epoch": 3.73,
"learning_rate": 7.935060549626696e-06,
"loss": 3.0692,
"step": 17300
},
{
"epoch": 3.74,
"learning_rate": 7.909527961931562e-06,
"loss": 3.1465,
"step": 17310
},
{
"epoch": 3.74,
"learning_rate": 7.884028796886863e-06,
"loss": 3.0744,
"step": 17320
},
{
"epoch": 3.74,
"learning_rate": 7.858563104359565e-06,
"loss": 3.0674,
"step": 17330
},
{
"epoch": 3.74,
"learning_rate": 7.833130934151145e-06,
"loss": 3.0385,
"step": 17340
},
{
"epoch": 3.75,
"learning_rate": 7.807732335997537e-06,
"loss": 3.0108,
"step": 17350
},
{
"epoch": 3.75,
"learning_rate": 7.782367359569015e-06,
"loss": 3.0098,
"step": 17360
},
{
"epoch": 3.75,
"learning_rate": 7.757036054470108e-06,
"loss": 3.0531,
"step": 17370
},
{
"epoch": 3.75,
"learning_rate": 7.731738470239483e-06,
"loss": 3.0734,
"step": 17380
},
{
"epoch": 3.75,
"learning_rate": 7.70647465634988e-06,
"loss": 3.043,
"step": 17390
},
{
"epoch": 3.76,
"learning_rate": 7.681244662207979e-06,
"loss": 3.0281,
"step": 17400
},
{
"epoch": 3.76,
"learning_rate": 7.656048537154336e-06,
"loss": 2.9923,
"step": 17410
},
{
"epoch": 3.76,
"learning_rate": 7.63088633046326e-06,
"loss": 3.0395,
"step": 17420
},
{
"epoch": 3.76,
"learning_rate": 7.605758091342735e-06,
"loss": 2.9974,
"step": 17430
},
{
"epoch": 3.77,
"learning_rate": 7.580663868934315e-06,
"loss": 3.0726,
"step": 17440
},
{
"epoch": 3.77,
"learning_rate": 7.555603712313028e-06,
"loss": 3.1387,
"step": 17450
},
{
"epoch": 3.77,
"learning_rate": 7.530577670487288e-06,
"loss": 3.008,
"step": 17460
},
{
"epoch": 3.77,
"learning_rate": 7.505585792398781e-06,
"loss": 3.0526,
"step": 17470
},
{
"epoch": 3.77,
"learning_rate": 7.480628126922396e-06,
"loss": 3.1219,
"step": 17480
},
{
"epoch": 3.78,
"learning_rate": 7.455704722866105e-06,
"loss": 3.044,
"step": 17490
},
{
"epoch": 3.78,
"learning_rate": 7.430815628970881e-06,
"loss": 3.0597,
"step": 17500
},
{
"epoch": 3.78,
"learning_rate": 7.405960893910599e-06,
"loss": 3.0511,
"step": 17510
},
{
"epoch": 3.78,
"learning_rate": 7.381140566291928e-06,
"loss": 3.0602,
"step": 17520
},
{
"epoch": 3.78,
"learning_rate": 7.35635469465428e-06,
"loss": 3.0561,
"step": 17530
},
{
"epoch": 3.79,
"learning_rate": 7.331603327469658e-06,
"loss": 3.0664,
"step": 17540
},
{
"epoch": 3.79,
"learning_rate": 7.306886513142589e-06,
"loss": 3.0127,
"step": 17550
},
{
"epoch": 3.79,
"learning_rate": 7.282204300010034e-06,
"loss": 3.0184,
"step": 17560
},
{
"epoch": 3.79,
"learning_rate": 7.2575567363412894e-06,
"loss": 3.0093,
"step": 17570
},
{
"epoch": 3.8,
"learning_rate": 7.232943870337877e-06,
"loss": 3.1023,
"step": 17580
},
{
"epoch": 3.8,
"learning_rate": 7.208365750133478e-06,
"loss": 3.0619,
"step": 17590
},
{
"epoch": 3.8,
"learning_rate": 7.1838224237938125e-06,
"loss": 3.0545,
"step": 17600
},
{
"epoch": 3.8,
"learning_rate": 7.159313939316564e-06,
"loss": 3.137,
"step": 17610
},
{
"epoch": 3.8,
"learning_rate": 7.134840344631275e-06,
"loss": 3.0705,
"step": 17620
},
{
"epoch": 3.81,
"learning_rate": 7.110401687599255e-06,
"loss": 3.0324,
"step": 17630
},
{
"epoch": 3.81,
"learning_rate": 7.08599801601349e-06,
"loss": 3.0266,
"step": 17640
},
{
"epoch": 3.81,
"learning_rate": 7.061629377598542e-06,
"loss": 3.0595,
"step": 17650
},
{
"epoch": 3.81,
"learning_rate": 7.037295820010481e-06,
"loss": 3.0355,
"step": 17660
},
{
"epoch": 3.81,
"learning_rate": 7.012997390836745e-06,
"loss": 2.9801,
"step": 17670
},
{
"epoch": 3.82,
"learning_rate": 6.988734137596095e-06,
"loss": 3.0169,
"step": 17680
},
{
"epoch": 3.82,
"learning_rate": 6.964506107738486e-06,
"loss": 3.0667,
"step": 17690
},
{
"epoch": 3.82,
"learning_rate": 6.940313348644994e-06,
"loss": 3.0117,
"step": 17700
},
{
"epoch": 3.82,
"learning_rate": 6.916155907627725e-06,
"loss": 3.0461,
"step": 17710
},
{
"epoch": 3.83,
"learning_rate": 6.892033831929703e-06,
"loss": 3.0584,
"step": 17720
},
{
"epoch": 3.83,
"learning_rate": 6.8679471687247975e-06,
"loss": 3.0127,
"step": 17730
},
{
"epoch": 3.83,
"learning_rate": 6.843895965117636e-06,
"loss": 3.0464,
"step": 17740
},
{
"epoch": 3.83,
"learning_rate": 6.819880268143483e-06,
"loss": 3.0342,
"step": 17750
},
{
"epoch": 3.83,
"learning_rate": 6.795900124768168e-06,
"loss": 3.0692,
"step": 17760
},
{
"epoch": 3.84,
"learning_rate": 6.771955581887998e-06,
"loss": 3.077,
"step": 17770
},
{
"epoch": 3.84,
"learning_rate": 6.748046686329648e-06,
"loss": 3.0021,
"step": 17780
},
{
"epoch": 3.84,
"learning_rate": 6.724173484850094e-06,
"loss": 3.0601,
"step": 17790
},
{
"epoch": 3.84,
"learning_rate": 6.700336024136491e-06,
"loss": 3.046,
"step": 17800
},
{
"epoch": 3.84,
"learning_rate": 6.676534350806116e-06,
"loss": 3.0126,
"step": 17810
},
{
"epoch": 3.85,
"learning_rate": 6.652768511406246e-06,
"loss": 3.0162,
"step": 17820
},
{
"epoch": 3.85,
"learning_rate": 6.629038552414083e-06,
"loss": 3.0346,
"step": 17830
},
{
"epoch": 3.85,
"learning_rate": 6.605344520236662e-06,
"loss": 3.0149,
"step": 17840
},
{
"epoch": 3.85,
"learning_rate": 6.5816864612107595e-06,
"loss": 3.0577,
"step": 17850
},
{
"epoch": 3.86,
"learning_rate": 6.5580644216028e-06,
"loss": 3.029,
"step": 17860
},
{
"epoch": 3.86,
"learning_rate": 6.534478447608766e-06,
"loss": 3.0367,
"step": 17870
},
{
"epoch": 3.86,
"learning_rate": 6.510928585354112e-06,
"loss": 3.0446,
"step": 17880
},
{
"epoch": 3.86,
"learning_rate": 6.487414880893666e-06,
"loss": 3.0617,
"step": 17890
},
{
"epoch": 3.86,
"learning_rate": 6.463937380211555e-06,
"loss": 3.0387,
"step": 17900
},
{
"epoch": 3.87,
"learning_rate": 6.440496129221094e-06,
"loss": 3.027,
"step": 17910
},
{
"epoch": 3.87,
"learning_rate": 6.417091173764711e-06,
"loss": 3.0221,
"step": 17920
},
{
"epoch": 3.87,
"learning_rate": 6.393722559613849e-06,
"loss": 3.0637,
"step": 17930
},
{
"epoch": 3.87,
"learning_rate": 6.370390332468898e-06,
"loss": 3.0915,
"step": 17940
},
{
"epoch": 3.88,
"learning_rate": 6.347094537959067e-06,
"loss": 2.9943,
"step": 17950
},
{
"epoch": 3.88,
"learning_rate": 6.323835221642327e-06,
"loss": 3.0143,
"step": 17960
},
{
"epoch": 3.88,
"learning_rate": 6.30061242900531e-06,
"loss": 3.0351,
"step": 17970
},
{
"epoch": 3.88,
"learning_rate": 6.277426205463219e-06,
"loss": 3.0477,
"step": 17980
},
{
"epoch": 3.88,
"learning_rate": 6.254276596359742e-06,
"loss": 2.9847,
"step": 17990
},
{
"epoch": 3.89,
"learning_rate": 6.231163646966967e-06,
"loss": 3.0042,
"step": 18000
},
{
"epoch": 3.89,
"learning_rate": 6.208087402485283e-06,
"loss": 3.0593,
"step": 18010
},
{
"epoch": 3.89,
"learning_rate": 6.1850479080432984e-06,
"loss": 3.0764,
"step": 18020
},
{
"epoch": 3.89,
"learning_rate": 6.162045208697759e-06,
"loss": 3.115,
"step": 18030
},
{
"epoch": 3.89,
"learning_rate": 6.1390793494334434e-06,
"loss": 3.0792,
"step": 18040
},
{
"epoch": 3.9,
"learning_rate": 6.11615037516309e-06,
"loss": 3.0233,
"step": 18050
},
{
"epoch": 3.9,
"learning_rate": 6.093258330727306e-06,
"loss": 3.041,
"step": 18060
},
{
"epoch": 3.9,
"learning_rate": 6.070403260894472e-06,
"loss": 3.0621,
"step": 18070
},
{
"epoch": 3.9,
"learning_rate": 6.047585210360662e-06,
"loss": 3.0713,
"step": 18080
},
{
"epoch": 3.91,
"learning_rate": 6.024804223749556e-06,
"loss": 3.0191,
"step": 18090
},
{
"epoch": 3.91,
"learning_rate": 6.002060345612348e-06,
"loss": 3.0548,
"step": 18100
},
{
"epoch": 3.91,
"learning_rate": 5.979353620427655e-06,
"loss": 3.0386,
"step": 18110
},
{
"epoch": 3.91,
"learning_rate": 5.956684092601458e-06,
"loss": 3.1075,
"step": 18120
},
{
"epoch": 3.91,
"learning_rate": 5.9340518064669756e-06,
"loss": 3.0638,
"step": 18130
},
{
"epoch": 3.92,
"learning_rate": 5.911456806284596e-06,
"loss": 3.0404,
"step": 18140
},
{
"epoch": 3.92,
"learning_rate": 5.8888991362417965e-06,
"loss": 3.0817,
"step": 18150
},
{
"epoch": 3.92,
"learning_rate": 5.866378840453044e-06,
"loss": 3.0259,
"step": 18160
},
{
"epoch": 3.92,
"learning_rate": 5.843895962959719e-06,
"loss": 3.0482,
"step": 18170
},
{
"epoch": 3.92,
"learning_rate": 5.821450547730023e-06,
"loss": 3.0094,
"step": 18180
},
{
"epoch": 3.93,
"learning_rate": 5.799042638658908e-06,
"loss": 3.0122,
"step": 18190
},
{
"epoch": 3.93,
"learning_rate": 5.776672279567958e-06,
"loss": 2.9955,
"step": 18200
},
{
"epoch": 3.93,
"learning_rate": 5.7543395142053344e-06,
"loss": 3.0325,
"step": 18210
},
{
"epoch": 3.93,
"learning_rate": 5.732044386245677e-06,
"loss": 3.0582,
"step": 18220
},
{
"epoch": 3.94,
"learning_rate": 5.709786939290021e-06,
"loss": 3.0594,
"step": 18230
},
{
"epoch": 3.94,
"learning_rate": 5.687567216865711e-06,
"loss": 3.0677,
"step": 18240
},
{
"epoch": 3.94,
"learning_rate": 5.665385262426315e-06,
"loss": 3.0355,
"step": 18250
},
{
"epoch": 3.94,
"learning_rate": 5.643241119351544e-06,
"loss": 3.0319,
"step": 18260
},
{
"epoch": 3.94,
"learning_rate": 5.621134830947164e-06,
"loss": 3.0383,
"step": 18270
},
{
"epoch": 3.95,
"learning_rate": 5.5990664404449055e-06,
"loss": 3.0282,
"step": 18280
},
{
"epoch": 3.95,
"learning_rate": 5.577035991002391e-06,
"loss": 3.0562,
"step": 18290
},
{
"epoch": 3.95,
"learning_rate": 5.555043525703041e-06,
"loss": 3.0665,
"step": 18300
},
{
"epoch": 3.95,
"learning_rate": 5.533089087555995e-06,
"loss": 3.0059,
"step": 18310
},
{
"epoch": 3.96,
"learning_rate": 5.511172719496025e-06,
"loss": 3.013,
"step": 18320
},
{
"epoch": 3.96,
"learning_rate": 5.489294464383451e-06,
"loss": 2.9888,
"step": 18330
},
{
"epoch": 3.96,
"learning_rate": 5.4674543650040515e-06,
"loss": 3.0788,
"step": 18340
},
{
"epoch": 3.96,
"learning_rate": 5.445652464069007e-06,
"loss": 3.0855,
"step": 18350
},
{
"epoch": 3.96,
"learning_rate": 5.423888804214775e-06,
"loss": 3.0896,
"step": 18360
},
{
"epoch": 3.97,
"learning_rate": 5.402163428003038e-06,
"loss": 2.9985,
"step": 18370
},
{
"epoch": 3.97,
"learning_rate": 5.380476377920604e-06,
"loss": 3.0263,
"step": 18380
},
{
"epoch": 3.97,
"learning_rate": 5.358827696379334e-06,
"loss": 3.0636,
"step": 18390
},
{
"epoch": 3.97,
"learning_rate": 5.3372174257160515e-06,
"loss": 2.9904,
"step": 18400
},
{
"epoch": 3.97,
"learning_rate": 5.315645608192463e-06,
"loss": 3.0567,
"step": 18410
},
{
"epoch": 3.98,
"learning_rate": 5.2941122859950785e-06,
"loss": 3.0682,
"step": 18420
},
{
"epoch": 3.98,
"learning_rate": 5.272617501235117e-06,
"loss": 3.0177,
"step": 18430
},
{
"epoch": 3.98,
"learning_rate": 5.251161295948443e-06,
"loss": 3.0723,
"step": 18440
},
{
"epoch": 3.98,
"learning_rate": 5.229743712095467e-06,
"loss": 3.0442,
"step": 18450
},
{
"epoch": 3.99,
"learning_rate": 5.208364791561071e-06,
"loss": 3.0286,
"step": 18460
},
{
"epoch": 3.99,
"learning_rate": 5.187024576154526e-06,
"loss": 3.0554,
"step": 18470
},
{
"epoch": 3.99,
"learning_rate": 5.16572310760941e-06,
"loss": 3.0437,
"step": 18480
},
{
"epoch": 3.99,
"learning_rate": 5.1444604275835305e-06,
"loss": 2.9838,
"step": 18490
},
{
"epoch": 3.99,
"learning_rate": 5.123236577658835e-06,
"loss": 3.0219,
"step": 18500
},
{
"epoch": 4.0,
"learning_rate": 5.1020515993413315e-06,
"loss": 3.0591,
"step": 18510
},
{
"epoch": 4.0,
"learning_rate": 5.080905534061014e-06,
"loss": 3.0456,
"step": 18520
},
{
"epoch": 4.0,
"eval_loss": 3.0454726219177246,
"eval_runtime": 194.385,
"eval_samples_per_second": 762.507,
"eval_steps_per_second": 23.829,
"step": 18528
},
{
"epoch": 4.0,
"learning_rate": 5.059798423171777e-06,
"loss": 2.9865,
"step": 18530
},
{
"epoch": 4.0,
"learning_rate": 5.038730307951331e-06,
"loss": 3.0307,
"step": 18540
},
{
"epoch": 4.0,
"learning_rate": 5.0177012296011185e-06,
"loss": 3.0563,
"step": 18550
},
{
"epoch": 4.01,
"learning_rate": 4.996711229246268e-06,
"loss": 3.0752,
"step": 18560
},
{
"epoch": 4.01,
"learning_rate": 4.975760347935454e-06,
"loss": 3.0579,
"step": 18570
},
{
"epoch": 4.01,
"learning_rate": 4.954848626640865e-06,
"loss": 3.07,
"step": 18580
},
{
"epoch": 4.01,
"learning_rate": 4.933976106258104e-06,
"loss": 3.0167,
"step": 18590
},
{
"epoch": 4.02,
"learning_rate": 4.913142827606107e-06,
"loss": 3.005,
"step": 18600
},
{
"epoch": 4.02,
"learning_rate": 4.892348831427077e-06,
"loss": 3.0494,
"step": 18610
},
{
"epoch": 4.02,
"learning_rate": 4.871594158386386e-06,
"loss": 3.0751,
"step": 18620
},
{
"epoch": 4.02,
"learning_rate": 4.850878849072505e-06,
"loss": 3.0289,
"step": 18630
},
{
"epoch": 4.02,
"learning_rate": 4.830202943996937e-06,
"loss": 3.0331,
"step": 18640
},
{
"epoch": 4.03,
"learning_rate": 4.809566483594108e-06,
"loss": 2.9982,
"step": 18650
},
{
"epoch": 4.03,
"learning_rate": 4.788969508221314e-06,
"loss": 3.0489,
"step": 18660
},
{
"epoch": 4.03,
"learning_rate": 4.768412058158631e-06,
"loss": 3.0287,
"step": 18670
},
{
"epoch": 4.03,
"learning_rate": 4.747894173608839e-06,
"loss": 3.0382,
"step": 18680
},
{
"epoch": 4.03,
"learning_rate": 4.727415894697338e-06,
"loss": 3.0465,
"step": 18690
},
{
"epoch": 4.04,
"learning_rate": 4.706977261472076e-06,
"loss": 3.0746,
"step": 18700
},
{
"epoch": 4.04,
"learning_rate": 4.6865783139034756e-06,
"loss": 3.048,
"step": 18710
},
{
"epoch": 4.04,
"learning_rate": 4.666219091884338e-06,
"loss": 3.0308,
"step": 18720
},
{
"epoch": 4.04,
"learning_rate": 4.645899635229786e-06,
"loss": 3.0017,
"step": 18730
},
{
"epoch": 4.05,
"learning_rate": 4.625619983677168e-06,
"loss": 3.0251,
"step": 18740
},
{
"epoch": 4.05,
"learning_rate": 4.605380176885987e-06,
"loss": 3.0564,
"step": 18750
},
{
"epoch": 4.05,
"learning_rate": 4.585180254437838e-06,
"loss": 3.0616,
"step": 18760
},
{
"epoch": 4.05,
"learning_rate": 4.565020255836305e-06,
"loss": 3.0099,
"step": 18770
},
{
"epoch": 4.05,
"learning_rate": 4.544900220506901e-06,
"loss": 3.0518,
"step": 18780
},
{
"epoch": 4.06,
"learning_rate": 4.524820187796977e-06,
"loss": 3.0556,
"step": 18790
},
{
"epoch": 4.06,
"learning_rate": 4.504780196975664e-06,
"loss": 3.0582,
"step": 18800
},
{
"epoch": 4.06,
"learning_rate": 4.484780287233778e-06,
"loss": 3.0635,
"step": 18810
},
{
"epoch": 4.06,
"learning_rate": 4.464820497683758e-06,
"loss": 3.0223,
"step": 18820
},
{
"epoch": 4.07,
"learning_rate": 4.44490086735958e-06,
"loss": 3.0759,
"step": 18830
},
{
"epoch": 4.07,
"learning_rate": 4.425021435216684e-06,
"loss": 3.0536,
"step": 18840
},
{
"epoch": 4.07,
"learning_rate": 4.405182240131891e-06,
"loss": 3.0062,
"step": 18850
},
{
"epoch": 4.07,
"learning_rate": 4.385383320903344e-06,
"loss": 3.0171,
"step": 18860
},
{
"epoch": 4.07,
"learning_rate": 4.3656247162504105e-06,
"loss": 3.0007,
"step": 18870
},
{
"epoch": 4.08,
"learning_rate": 4.345906464813628e-06,
"loss": 3.046,
"step": 18880
},
{
"epoch": 4.08,
"learning_rate": 4.326228605154611e-06,
"loss": 3.0513,
"step": 18890
},
{
"epoch": 4.08,
"learning_rate": 4.3065911757559806e-06,
"loss": 3.0251,
"step": 18900
},
{
"epoch": 4.08,
"learning_rate": 4.286994215021301e-06,
"loss": 3.023,
"step": 18910
},
{
"epoch": 4.08,
"learning_rate": 4.267437761274987e-06,
"loss": 3.0176,
"step": 18920
},
{
"epoch": 4.09,
"learning_rate": 4.247921852762235e-06,
"loss": 3.0074,
"step": 18930
},
{
"epoch": 4.09,
"learning_rate": 4.228446527648955e-06,
"loss": 3.0077,
"step": 18940
},
{
"epoch": 4.09,
"learning_rate": 4.209011824021691e-06,
"loss": 2.9964,
"step": 18950
},
{
"epoch": 4.09,
"learning_rate": 4.189617779887539e-06,
"loss": 3.0993,
"step": 18960
},
{
"epoch": 4.1,
"learning_rate": 4.170264433174093e-06,
"loss": 3.0879,
"step": 18970
},
{
"epoch": 4.1,
"learning_rate": 4.150951821729349e-06,
"loss": 3.0127,
"step": 18980
},
{
"epoch": 4.1,
"learning_rate": 4.13167998332164e-06,
"loss": 3.0037,
"step": 18990
},
{
"epoch": 4.1,
"learning_rate": 4.112448955639561e-06,
"loss": 3.0264,
"step": 19000
},
{
"epoch": 4.1,
"learning_rate": 4.093258776291903e-06,
"loss": 3.0338,
"step": 19010
},
{
"epoch": 4.11,
"learning_rate": 4.0741094828075646e-06,
"loss": 2.9888,
"step": 19020
},
{
"epoch": 4.11,
"learning_rate": 4.055001112635492e-06,
"loss": 3.0405,
"step": 19030
},
{
"epoch": 4.11,
"learning_rate": 4.035933703144598e-06,
"loss": 3.0779,
"step": 19040
},
{
"epoch": 4.11,
"learning_rate": 4.016907291623695e-06,
"loss": 3.0088,
"step": 19050
},
{
"epoch": 4.11,
"learning_rate": 3.997921915281413e-06,
"loss": 3.0392,
"step": 19060
},
{
"epoch": 4.12,
"learning_rate": 3.978977611246137e-06,
"loss": 3.0591,
"step": 19070
},
{
"epoch": 4.12,
"learning_rate": 3.960074416565929e-06,
"loss": 3.0521,
"step": 19080
},
{
"epoch": 4.12,
"learning_rate": 3.941212368208447e-06,
"loss": 3.0649,
"step": 19090
},
{
"epoch": 4.12,
"learning_rate": 3.922391503060902e-06,
"loss": 3.0837,
"step": 19100
},
{
"epoch": 4.13,
"learning_rate": 3.903611857929951e-06,
"loss": 3.0604,
"step": 19110
},
{
"epoch": 4.13,
"learning_rate": 3.884873469541642e-06,
"loss": 3.0406,
"step": 19120
},
{
"epoch": 4.13,
"learning_rate": 3.866176374541338e-06,
"loss": 3.0361,
"step": 19130
},
{
"epoch": 4.13,
"learning_rate": 3.847520609493657e-06,
"loss": 3.0607,
"step": 19140
},
{
"epoch": 4.13,
"learning_rate": 3.828906210882377e-06,
"loss": 3.0639,
"step": 19150
},
{
"epoch": 4.14,
"learning_rate": 3.810333215110387e-06,
"loss": 3.0666,
"step": 19160
},
{
"epoch": 4.14,
"learning_rate": 3.7918016584996136e-06,
"loss": 3.0383,
"step": 19170
},
{
"epoch": 4.14,
"learning_rate": 3.7733115772909307e-06,
"loss": 3.0375,
"step": 19180
},
{
"epoch": 4.14,
"learning_rate": 3.7548630076441076e-06,
"loss": 3.0097,
"step": 19190
},
{
"epoch": 4.15,
"learning_rate": 3.736455985637735e-06,
"loss": 3.0756,
"step": 19200
},
{
"epoch": 4.15,
"learning_rate": 3.7180905472691457e-06,
"loss": 3.0592,
"step": 19210
},
{
"epoch": 4.15,
"learning_rate": 3.699766728454351e-06,
"loss": 3.0135,
"step": 19220
},
{
"epoch": 4.15,
"learning_rate": 3.681484565027979e-06,
"loss": 3.0452,
"step": 19230
},
{
"epoch": 4.15,
"learning_rate": 3.6632440927431814e-06,
"loss": 3.0594,
"step": 19240
},
{
"epoch": 4.16,
"learning_rate": 3.645045347271589e-06,
"loss": 3.0343,
"step": 19250
},
{
"epoch": 4.16,
"learning_rate": 3.6268883642032236e-06,
"loss": 2.9991,
"step": 19260
},
{
"epoch": 4.16,
"learning_rate": 3.6087731790464376e-06,
"loss": 3.0261,
"step": 19270
},
{
"epoch": 4.16,
"learning_rate": 3.590699827227842e-06,
"loss": 3.0457,
"step": 19280
},
{
"epoch": 4.16,
"learning_rate": 3.5726683440922394e-06,
"loss": 3.0501,
"step": 19290
},
{
"epoch": 4.17,
"learning_rate": 3.554678764902544e-06,
"loss": 3.0199,
"step": 19300
},
{
"epoch": 4.17,
"learning_rate": 3.5367311248397343e-06,
"loss": 3.0318,
"step": 19310
},
{
"epoch": 4.17,
"learning_rate": 3.5188254590027615e-06,
"loss": 2.9988,
"step": 19320
},
{
"epoch": 4.17,
"learning_rate": 3.5009618024084924e-06,
"loss": 3.0363,
"step": 19330
},
{
"epoch": 4.18,
"learning_rate": 3.483140189991646e-06,
"loss": 3.036,
"step": 19340
},
{
"epoch": 4.18,
"learning_rate": 3.4653606566047077e-06,
"loss": 3.0258,
"step": 19350
},
{
"epoch": 4.18,
"learning_rate": 3.44762323701788e-06,
"loss": 3.0607,
"step": 19360
},
{
"epoch": 4.18,
"learning_rate": 3.429927965919e-06,
"loss": 3.0373,
"step": 19370
},
{
"epoch": 4.18,
"learning_rate": 3.4122748779134905e-06,
"loss": 3.0332,
"step": 19380
},
{
"epoch": 4.19,
"learning_rate": 3.3946640075242675e-06,
"loss": 3.0379,
"step": 19390
},
{
"epoch": 4.19,
"learning_rate": 3.377095389191684e-06,
"loss": 3.0093,
"step": 19400
},
{
"epoch": 4.19,
"learning_rate": 3.3595690572734733e-06,
"loss": 3.0827,
"step": 19410
},
{
"epoch": 4.19,
"learning_rate": 3.3420850460446627e-06,
"loss": 3.0456,
"step": 19420
},
{
"epoch": 4.19,
"learning_rate": 3.324643389697521e-06,
"loss": 3.0801,
"step": 19430
},
{
"epoch": 4.2,
"learning_rate": 3.307244122341488e-06,
"loss": 3.0707,
"step": 19440
},
{
"epoch": 4.2,
"learning_rate": 3.289887278003101e-06,
"loss": 3.0354,
"step": 19450
},
{
"epoch": 4.2,
"learning_rate": 3.2725728906259357e-06,
"loss": 3.0557,
"step": 19460
},
{
"epoch": 4.2,
"learning_rate": 3.2553009940705396e-06,
"loss": 3.0726,
"step": 19470
},
{
"epoch": 4.21,
"learning_rate": 3.2380716221143636e-06,
"loss": 3.0545,
"step": 19480
},
{
"epoch": 4.21,
"learning_rate": 3.2208848084516933e-06,
"loss": 3.0324,
"step": 19490
},
{
"epoch": 4.21,
"learning_rate": 3.203740586693588e-06,
"loss": 3.0199,
"step": 19500
},
{
"epoch": 4.21,
"learning_rate": 3.186638990367813e-06,
"loss": 3.0257,
"step": 19510
},
{
"epoch": 4.21,
"learning_rate": 3.1695800529187747e-06,
"loss": 3.0524,
"step": 19520
},
{
"epoch": 4.22,
"learning_rate": 3.152563807707451e-06,
"loss": 3.0313,
"step": 19530
},
{
"epoch": 4.22,
"learning_rate": 3.1355902880113308e-06,
"loss": 3.0431,
"step": 19540
},
{
"epoch": 4.22,
"learning_rate": 3.118659527024356e-06,
"loss": 3.0687,
"step": 19550
},
{
"epoch": 4.22,
"learning_rate": 3.1017715578568362e-06,
"loss": 3.0065,
"step": 19560
},
{
"epoch": 4.22,
"learning_rate": 3.084926413535402e-06,
"loss": 2.9737,
"step": 19570
},
{
"epoch": 4.23,
"learning_rate": 3.068124127002936e-06,
"loss": 3.0437,
"step": 19580
},
{
"epoch": 4.23,
"learning_rate": 3.0513647311185e-06,
"loss": 3.0656,
"step": 19590
},
{
"epoch": 4.23,
"learning_rate": 3.034648258657283e-06,
"loss": 2.9954,
"step": 19600
},
{
"epoch": 4.23,
"learning_rate": 3.0179747423105255e-06,
"loss": 3.0332,
"step": 19610
},
{
"epoch": 4.24,
"learning_rate": 3.001344214685478e-06,
"loss": 3.0611,
"step": 19620
},
{
"epoch": 4.24,
"learning_rate": 2.984756708305303e-06,
"loss": 3.022,
"step": 19630
},
{
"epoch": 4.24,
"learning_rate": 2.9682122556090373e-06,
"loss": 3.0146,
"step": 19640
},
{
"epoch": 4.24,
"learning_rate": 2.951710888951517e-06,
"loss": 3.0682,
"step": 19650
},
{
"epoch": 4.24,
"learning_rate": 2.9352526406033227e-06,
"loss": 2.9759,
"step": 19660
},
{
"epoch": 4.25,
"learning_rate": 2.918837542750705e-06,
"loss": 3.0547,
"step": 19670
},
{
"epoch": 4.25,
"learning_rate": 2.9024656274955377e-06,
"loss": 3.0049,
"step": 19680
},
{
"epoch": 4.25,
"learning_rate": 2.8861369268552345e-06,
"loss": 3.0891,
"step": 19690
},
{
"epoch": 4.25,
"learning_rate": 2.8698514727627053e-06,
"loss": 3.0416,
"step": 19700
},
{
"epoch": 4.26,
"learning_rate": 2.8536092970662785e-06,
"loss": 3.067,
"step": 19710
},
{
"epoch": 4.26,
"learning_rate": 2.8374104315296577e-06,
"loss": 3.0185,
"step": 19720
},
{
"epoch": 4.26,
"learning_rate": 2.821254907831833e-06,
"loss": 3.0962,
"step": 19730
},
{
"epoch": 4.26,
"learning_rate": 2.8051427575670445e-06,
"loss": 3.0583,
"step": 19740
},
{
"epoch": 4.26,
"learning_rate": 2.7890740122447077e-06,
"loss": 3.0327,
"step": 19750
},
{
"epoch": 4.27,
"learning_rate": 2.773048703289352e-06,
"loss": 3.0717,
"step": 19760
},
{
"epoch": 4.27,
"learning_rate": 2.757066862040561e-06,
"loss": 3.074,
"step": 19770
},
{
"epoch": 4.27,
"learning_rate": 2.741128519752911e-06,
"loss": 3.0281,
"step": 19780
},
{
"epoch": 4.27,
"learning_rate": 2.725233707595917e-06,
"loss": 3.0431,
"step": 19790
},
{
"epoch": 4.27,
"learning_rate": 2.7093824566539613e-06,
"loss": 2.9766,
"step": 19800
},
{
"epoch": 4.28,
"learning_rate": 2.6935747979262296e-06,
"loss": 3.0196,
"step": 19810
},
{
"epoch": 4.28,
"learning_rate": 2.6778107623266683e-06,
"loss": 3.0266,
"step": 19820
},
{
"epoch": 4.28,
"learning_rate": 2.662090380683907e-06,
"loss": 2.9949,
"step": 19830
},
{
"epoch": 4.28,
"learning_rate": 2.646413683741203e-06,
"loss": 3.0544,
"step": 19840
},
{
"epoch": 4.29,
"learning_rate": 2.630780702156388e-06,
"loss": 3.0302,
"step": 19850
},
{
"epoch": 4.29,
"learning_rate": 2.6151914665017985e-06,
"loss": 3.0381,
"step": 19860
},
{
"epoch": 4.29,
"learning_rate": 2.59964600726422e-06,
"loss": 3.0418,
"step": 19870
},
{
"epoch": 4.29,
"learning_rate": 2.584144354844831e-06,
"loss": 3.0066,
"step": 19880
},
{
"epoch": 4.29,
"learning_rate": 2.568686539559134e-06,
"loss": 3.0644,
"step": 19890
},
{
"epoch": 4.3,
"learning_rate": 2.5532725916369073e-06,
"loss": 2.9938,
"step": 19900
},
{
"epoch": 4.3,
"learning_rate": 2.5379025412221396e-06,
"loss": 3.0974,
"step": 19910
},
{
"epoch": 4.3,
"learning_rate": 2.5225764183729766e-06,
"loss": 3.0452,
"step": 19920
},
{
"epoch": 4.3,
"learning_rate": 2.5072942530616457e-06,
"loss": 2.9947,
"step": 19930
},
{
"epoch": 4.3,
"learning_rate": 2.4920560751744234e-06,
"loss": 3.0233,
"step": 19940
},
{
"epoch": 4.31,
"learning_rate": 2.4768619145115525e-06,
"loss": 3.0352,
"step": 19950
},
{
"epoch": 4.31,
"learning_rate": 2.4617118007872003e-06,
"loss": 3.0504,
"step": 19960
},
{
"epoch": 4.31,
"learning_rate": 2.446605763629398e-06,
"loss": 3.0252,
"step": 19970
},
{
"epoch": 4.31,
"learning_rate": 2.431543832579966e-06,
"loss": 3.0409,
"step": 19980
},
{
"epoch": 4.32,
"learning_rate": 2.4165260370944797e-06,
"loss": 3.0288,
"step": 19990
},
{
"epoch": 4.32,
"learning_rate": 2.401552406542207e-06,
"loss": 3.0469,
"step": 20000
},
{
"epoch": 4.32,
"learning_rate": 2.3866229702060317e-06,
"loss": 3.069,
"step": 20010
},
{
"epoch": 4.32,
"learning_rate": 2.371737757282419e-06,
"loss": 3.0277,
"step": 20020
},
{
"epoch": 4.32,
"learning_rate": 2.356896796881347e-06,
"loss": 3.0241,
"step": 20030
},
{
"epoch": 4.33,
"learning_rate": 2.3421001180262527e-06,
"loss": 3.0497,
"step": 20040
},
{
"epoch": 4.33,
"learning_rate": 2.3273477496539713e-06,
"loss": 2.999,
"step": 20050
},
{
"epoch": 4.33,
"learning_rate": 2.31263972061469e-06,
"loss": 3.0518,
"step": 20060
},
{
"epoch": 4.33,
"learning_rate": 2.2979760596718745e-06,
"loss": 3.0747,
"step": 20070
},
{
"epoch": 4.34,
"learning_rate": 2.2833567955022394e-06,
"loss": 2.9885,
"step": 20080
},
{
"epoch": 4.34,
"learning_rate": 2.2687819566956592e-06,
"loss": 3.0485,
"step": 20090
},
{
"epoch": 4.34,
"learning_rate": 2.2542515717551336e-06,
"loss": 3.0665,
"step": 20100
},
{
"epoch": 4.34,
"learning_rate": 2.23976566909673e-06,
"loss": 3.071,
"step": 20110
},
{
"epoch": 4.34,
"learning_rate": 2.225324277049526e-06,
"loss": 3.0072,
"step": 20120
},
{
"epoch": 4.35,
"learning_rate": 2.210927423855547e-06,
"loss": 3.0289,
"step": 20130
},
{
"epoch": 4.35,
"learning_rate": 2.1965751376697208e-06,
"loss": 3.0734,
"step": 20140
},
{
"epoch": 4.35,
"learning_rate": 2.182267446559816e-06,
"loss": 3.03,
"step": 20150
},
{
"epoch": 4.35,
"learning_rate": 2.1680043785063913e-06,
"loss": 2.9907,
"step": 20160
},
{
"epoch": 4.35,
"learning_rate": 2.1537859614027432e-06,
"loss": 3.0549,
"step": 20170
},
{
"epoch": 4.36,
"learning_rate": 2.139612223054843e-06,
"loss": 3.0406,
"step": 20180
},
{
"epoch": 4.36,
"learning_rate": 2.1254831911812794e-06,
"loss": 3.0698,
"step": 20190
},
{
"epoch": 4.36,
"learning_rate": 2.11139889341323e-06,
"loss": 3.0084,
"step": 20200
},
{
"epoch": 4.36,
"learning_rate": 2.0973593572943735e-06,
"loss": 3.0007,
"step": 20210
},
{
"epoch": 4.37,
"learning_rate": 2.083364610280861e-06,
"loss": 3.051,
"step": 20220
},
{
"epoch": 4.37,
"learning_rate": 2.069414679741244e-06,
"loss": 3.0615,
"step": 20230
},
{
"epoch": 4.37,
"learning_rate": 2.0555095929564337e-06,
"loss": 3.0019,
"step": 20240
},
{
"epoch": 4.37,
"learning_rate": 2.0416493771196476e-06,
"loss": 3.0209,
"step": 20250
},
{
"epoch": 4.37,
"learning_rate": 2.027834059336345e-06,
"loss": 3.0158,
"step": 20260
},
{
"epoch": 4.38,
"learning_rate": 2.014063666624186e-06,
"loss": 3.0288,
"step": 20270
},
{
"epoch": 4.38,
"learning_rate": 2.000338225912968e-06,
"loss": 3.042,
"step": 20280
},
{
"epoch": 4.38,
"learning_rate": 1.9866577640445887e-06,
"loss": 3.0453,
"step": 20290
},
{
"epoch": 4.38,
"learning_rate": 1.9730223077729757e-06,
"loss": 3.0062,
"step": 20300
},
{
"epoch": 4.38,
"learning_rate": 1.9594318837640457e-06,
"loss": 3.0103,
"step": 20310
},
{
"epoch": 4.39,
"learning_rate": 1.9458865185956466e-06,
"loss": 3.0085,
"step": 20320
},
{
"epoch": 4.39,
"learning_rate": 1.932386238757508e-06,
"loss": 3.0425,
"step": 20330
},
{
"epoch": 4.39,
"learning_rate": 1.918931070651195e-06,
"loss": 3.0467,
"step": 20340
},
{
"epoch": 4.39,
"learning_rate": 1.9055210405900443e-06,
"loss": 3.0108,
"step": 20350
},
{
"epoch": 4.4,
"learning_rate": 1.89215617479912e-06,
"loss": 3.0835,
"step": 20360
},
{
"epoch": 4.4,
"learning_rate": 1.8788364994151652e-06,
"loss": 3.0448,
"step": 20370
},
{
"epoch": 4.4,
"learning_rate": 1.865562040486543e-06,
"loss": 3.0499,
"step": 20380
},
{
"epoch": 4.4,
"learning_rate": 1.8523328239731907e-06,
"loss": 3.078,
"step": 20390
},
{
"epoch": 4.4,
"learning_rate": 1.8391488757465685e-06,
"loss": 2.996,
"step": 20400
},
{
"epoch": 4.41,
"learning_rate": 1.8260102215896163e-06,
"loss": 3.0058,
"step": 20410
},
{
"epoch": 4.41,
"learning_rate": 1.8129168871966834e-06,
"loss": 3.0447,
"step": 20420
},
{
"epoch": 4.41,
"learning_rate": 1.7998688981734956e-06,
"loss": 3.0511,
"step": 20430
},
{
"epoch": 4.41,
"learning_rate": 1.7868662800371e-06,
"loss": 3.0276,
"step": 20440
},
{
"epoch": 4.41,
"learning_rate": 1.7739090582158142e-06,
"loss": 2.9776,
"step": 20450
},
{
"epoch": 4.42,
"learning_rate": 1.7609972580491795e-06,
"loss": 3.0327,
"step": 20460
},
{
"epoch": 4.42,
"learning_rate": 1.7481309047879064e-06,
"loss": 3.0344,
"step": 20470
},
{
"epoch": 4.42,
"learning_rate": 1.7353100235938308e-06,
"loss": 3.0573,
"step": 20480
},
{
"epoch": 4.42,
"learning_rate": 1.722534639539858e-06,
"loss": 3.0018,
"step": 20490
},
{
"epoch": 4.43,
"learning_rate": 1.709804777609922e-06,
"loss": 3.0297,
"step": 20500
},
{
"epoch": 4.43,
"learning_rate": 1.6971204626989313e-06,
"loss": 3.0394,
"step": 20510
},
{
"epoch": 4.43,
"learning_rate": 1.6844817196127155e-06,
"loss": 3.0197,
"step": 20520
},
{
"epoch": 4.43,
"learning_rate": 1.6718885730679951e-06,
"loss": 3.0315,
"step": 20530
},
{
"epoch": 4.43,
"learning_rate": 1.659341047692309e-06,
"loss": 3.0369,
"step": 20540
},
{
"epoch": 4.44,
"learning_rate": 1.6468391680239836e-06,
"loss": 3.0255,
"step": 20550
},
{
"epoch": 4.44,
"learning_rate": 1.6343829585120763e-06,
"loss": 3.0541,
"step": 20560
},
{
"epoch": 4.44,
"learning_rate": 1.6219724435163314e-06,
"loss": 3.0399,
"step": 20570
},
{
"epoch": 4.44,
"learning_rate": 1.6096076473071347e-06,
"loss": 3.0176,
"step": 20580
},
{
"epoch": 4.45,
"learning_rate": 1.5972885940654575e-06,
"loss": 3.0178,
"step": 20590
},
{
"epoch": 4.45,
"learning_rate": 1.5850153078828146e-06,
"loss": 3.0553,
"step": 20600
},
{
"epoch": 4.45,
"learning_rate": 1.5727878127612283e-06,
"loss": 3.023,
"step": 20610
},
{
"epoch": 4.45,
"learning_rate": 1.5606061326131571e-06,
"loss": 3.0519,
"step": 20620
},
{
"epoch": 4.45,
"learning_rate": 1.5484702912614723e-06,
"loss": 2.9974,
"step": 20630
},
{
"epoch": 4.46,
"learning_rate": 1.5363803124393894e-06,
"loss": 3.0369,
"step": 20640
},
{
"epoch": 4.46,
"learning_rate": 1.5243362197904481e-06,
"loss": 3.0243,
"step": 20650
},
{
"epoch": 4.46,
"learning_rate": 1.5123380368684409e-06,
"loss": 3.0834,
"step": 20660
},
{
"epoch": 4.46,
"learning_rate": 1.5003857871373844e-06,
"loss": 2.9976,
"step": 20670
},
{
"epoch": 4.46,
"learning_rate": 1.488479493971462e-06,
"loss": 3.0548,
"step": 20680
},
{
"epoch": 4.47,
"learning_rate": 1.476619180654984e-06,
"loss": 2.9973,
"step": 20690
},
{
"epoch": 4.47,
"learning_rate": 1.4648048703823441e-06,
"loss": 3.0671,
"step": 20700
},
{
"epoch": 4.47,
"learning_rate": 1.453036586257972e-06,
"loss": 3.0396,
"step": 20710
},
{
"epoch": 4.47,
"learning_rate": 1.4413143512962802e-06,
"loss": 3.0585,
"step": 20720
},
{
"epoch": 4.48,
"learning_rate": 1.4296381884216308e-06,
"loss": 3.0527,
"step": 20730
},
{
"epoch": 4.48,
"learning_rate": 1.4180081204682867e-06,
"loss": 3.0313,
"step": 20740
},
{
"epoch": 4.48,
"learning_rate": 1.4064241701803649e-06,
"loss": 3.0277,
"step": 20750
},
{
"epoch": 4.48,
"learning_rate": 1.3948863602117945e-06,
"loss": 3.0199,
"step": 20760
},
{
"epoch": 4.48,
"learning_rate": 1.3833947131262682e-06,
"loss": 3.0776,
"step": 20770
},
{
"epoch": 4.49,
"learning_rate": 1.3719492513972004e-06,
"loss": 3.0539,
"step": 20780
},
{
"epoch": 4.49,
"learning_rate": 1.3605499974076923e-06,
"loss": 3.0332,
"step": 20790
},
{
"epoch": 4.49,
"learning_rate": 1.34919697345047e-06,
"loss": 3.022,
"step": 20800
},
{
"epoch": 4.49,
"learning_rate": 1.3378902017278538e-06,
"loss": 3.047,
"step": 20810
},
{
"epoch": 4.49,
"learning_rate": 1.3266297043517172e-06,
"loss": 3.0031,
"step": 20820
},
{
"epoch": 4.5,
"learning_rate": 1.315415503343434e-06,
"loss": 3.0134,
"step": 20830
},
{
"epoch": 4.5,
"learning_rate": 1.3042476206338334e-06,
"loss": 3.0063,
"step": 20840
},
{
"epoch": 4.5,
"learning_rate": 1.2931260780631727e-06,
"loss": 3.0254,
"step": 20850
},
{
"epoch": 4.5,
"learning_rate": 1.2820508973810791e-06,
"loss": 3.0246,
"step": 20860
},
{
"epoch": 4.51,
"learning_rate": 1.2710221002465189e-06,
"loss": 3.0109,
"step": 20870
},
{
"epoch": 4.51,
"learning_rate": 1.2600397082277393e-06,
"loss": 3.0084,
"step": 20880
},
{
"epoch": 4.51,
"learning_rate": 1.2491037428022489e-06,
"loss": 3.0538,
"step": 20890
},
{
"epoch": 4.51,
"learning_rate": 1.2382142253567513e-06,
"loss": 3.0405,
"step": 20900
},
{
"epoch": 4.51,
"learning_rate": 1.2273711771871255e-06,
"loss": 3.0142,
"step": 20910
},
{
"epoch": 4.52,
"learning_rate": 1.2165746194983646e-06,
"loss": 3.046,
"step": 20920
},
{
"epoch": 4.52,
"learning_rate": 1.205824573404546e-06,
"loss": 3.0742,
"step": 20930
},
{
"epoch": 4.52,
"learning_rate": 1.1951210599287943e-06,
"loss": 2.9801,
"step": 20940
},
{
"epoch": 4.52,
"learning_rate": 1.184464100003224e-06,
"loss": 3.0401,
"step": 20950
},
{
"epoch": 4.53,
"learning_rate": 1.1738537144689116e-06,
"loss": 3.0078,
"step": 20960
},
{
"epoch": 4.53,
"learning_rate": 1.1632899240758554e-06,
"loss": 3.0189,
"step": 20970
},
{
"epoch": 4.53,
"learning_rate": 1.152772749482925e-06,
"loss": 2.9714,
"step": 20980
},
{
"epoch": 4.53,
"learning_rate": 1.1423022112578312e-06,
"loss": 3.0306,
"step": 20990
},
{
"epoch": 4.53,
"learning_rate": 1.1318783298770784e-06,
"loss": 3.0823,
"step": 21000
},
{
"epoch": 4.54,
"learning_rate": 1.1215011257259266e-06,
"loss": 3.0749,
"step": 21010
},
{
"epoch": 4.54,
"learning_rate": 1.111170619098356e-06,
"loss": 3.0678,
"step": 21020
},
{
"epoch": 4.54,
"learning_rate": 1.1008868301970205e-06,
"loss": 3.0619,
"step": 21030
},
{
"epoch": 4.54,
"learning_rate": 1.0906497791332127e-06,
"loss": 3.0172,
"step": 21040
},
{
"epoch": 4.54,
"learning_rate": 1.0804594859268213e-06,
"loss": 3.0106,
"step": 21050
},
{
"epoch": 4.55,
"learning_rate": 1.0703159705062998e-06,
"loss": 2.9689,
"step": 21060
},
{
"epoch": 4.55,
"learning_rate": 1.0602192527086163e-06,
"loss": 3.0145,
"step": 21070
},
{
"epoch": 4.55,
"learning_rate": 1.0501693522792205e-06,
"loss": 3.0696,
"step": 21080
},
{
"epoch": 4.55,
"learning_rate": 1.0401662888720049e-06,
"loss": 3.0166,
"step": 21090
},
{
"epoch": 4.56,
"learning_rate": 1.0302100820492684e-06,
"loss": 3.0319,
"step": 21100
},
{
"epoch": 4.56,
"learning_rate": 1.020300751281672e-06,
"loss": 3.0396,
"step": 21110
},
{
"epoch": 4.56,
"learning_rate": 1.0104383159482062e-06,
"loss": 3.0682,
"step": 21120
},
{
"epoch": 4.56,
"learning_rate": 1.0006227953361535e-06,
"loss": 3.0346,
"step": 21130
},
{
"epoch": 4.56,
"learning_rate": 9.908542086410428e-07,
"loss": 3.0435,
"step": 21140
},
{
"epoch": 4.57,
"learning_rate": 9.811325749666283e-07,
"loss": 2.9827,
"step": 21150
},
{
"epoch": 4.57,
"learning_rate": 9.714579133248274e-07,
"loss": 3.0127,
"step": 21160
},
{
"epoch": 4.57,
"learning_rate": 9.618302426357085e-07,
"loss": 3.0431,
"step": 21170
},
{
"epoch": 4.57,
"learning_rate": 9.52249581727438e-07,
"loss": 3.0331,
"step": 21180
},
{
"epoch": 4.57,
"learning_rate": 9.427159493362481e-07,
"loss": 3.0888,
"step": 21190
},
{
"epoch": 4.58,
"learning_rate": 9.332293641064055e-07,
"loss": 3.0355,
"step": 21200
},
{
"epoch": 4.58,
"learning_rate": 9.237898445901672e-07,
"loss": 3.0158,
"step": 21210
},
{
"epoch": 4.58,
"learning_rate": 9.143974092477386e-07,
"loss": 3.0575,
"step": 21220
},
{
"epoch": 4.58,
"learning_rate": 9.050520764472658e-07,
"loss": 3.0386,
"step": 21230
},
{
"epoch": 4.59,
"learning_rate": 8.957538644647601e-07,
"loss": 3.0041,
"step": 21240
},
{
"epoch": 4.59,
"learning_rate": 8.865027914840923e-07,
"loss": 2.9995,
"step": 21250
},
{
"epoch": 4.59,
"learning_rate": 8.772988755969436e-07,
"loss": 3.034,
"step": 21260
},
{
"epoch": 4.59,
"learning_rate": 8.681421348027713e-07,
"loss": 3.0114,
"step": 21270
},
{
"epoch": 4.59,
"learning_rate": 8.590325870087817e-07,
"loss": 3.0492,
"step": 21280
},
{
"epoch": 4.6,
"learning_rate": 8.499702500298829e-07,
"loss": 3.0267,
"step": 21290
},
{
"epoch": 4.6,
"learning_rate": 8.409551415886591e-07,
"loss": 3.0024,
"step": 21300
},
{
"epoch": 4.6,
"learning_rate": 8.319872793153355e-07,
"loss": 3.0313,
"step": 21310
},
{
"epoch": 4.6,
"learning_rate": 8.230666807477333e-07,
"loss": 3.0003,
"step": 21320
},
{
"epoch": 4.6,
"learning_rate": 8.141933633312504e-07,
"loss": 3.0757,
"step": 21330
},
{
"epoch": 4.61,
"learning_rate": 8.053673444188197e-07,
"loss": 3.0307,
"step": 21340
},
{
"epoch": 4.61,
"learning_rate": 7.965886412708707e-07,
"loss": 3.0076,
"step": 21350
},
{
"epoch": 4.61,
"learning_rate": 7.878572710553062e-07,
"loss": 3.0573,
"step": 21360
},
{
"epoch": 4.61,
"learning_rate": 7.791732508474592e-07,
"loss": 3.0526,
"step": 21370
},
{
"epoch": 4.62,
"learning_rate": 7.7053659763007e-07,
"loss": 3.0403,
"step": 21380
},
{
"epoch": 4.62,
"learning_rate": 7.61947328293236e-07,
"loss": 3.0372,
"step": 21390
},
{
"epoch": 4.62,
"learning_rate": 7.534054596344015e-07,
"loss": 3.0721,
"step": 21400
},
{
"epoch": 4.62,
"learning_rate": 7.44911008358301e-07,
"loss": 3.033,
"step": 21410
},
{
"epoch": 4.62,
"learning_rate": 7.364639910769438e-07,
"loss": 3.0855,
"step": 21420
},
{
"epoch": 4.63,
"learning_rate": 7.280644243095825e-07,
"loss": 3.046,
"step": 21430
},
{
"epoch": 4.63,
"learning_rate": 7.197123244826603e-07,
"loss": 3.0089,
"step": 21440
},
{
"epoch": 4.63,
"learning_rate": 7.11407707929801e-07,
"loss": 3.1103,
"step": 21450
},
{
"epoch": 4.63,
"learning_rate": 7.031505908917685e-07,
"loss": 3.0203,
"step": 21460
},
{
"epoch": 4.64,
"learning_rate": 6.949409895164294e-07,
"loss": 3.0508,
"step": 21470
},
{
"epoch": 4.64,
"learning_rate": 6.867789198587382e-07,
"loss": 3.0423,
"step": 21480
},
{
"epoch": 4.64,
"learning_rate": 6.78664397880685e-07,
"loss": 3.0054,
"step": 21490
},
{
"epoch": 4.64,
"learning_rate": 6.705974394512732e-07,
"loss": 3.0714,
"step": 21500
},
{
"epoch": 4.64,
"learning_rate": 6.625780603464998e-07,
"loss": 3.082,
"step": 21510
},
{
"epoch": 4.65,
"learning_rate": 6.54606276249306e-07,
"loss": 2.9895,
"step": 21520
},
{
"epoch": 4.65,
"learning_rate": 6.466821027495573e-07,
"loss": 3.0519,
"step": 21530
},
{
"epoch": 4.65,
"learning_rate": 6.388055553440103e-07,
"loss": 3.0362,
"step": 21540
},
{
"epoch": 4.65,
"learning_rate": 6.309766494362823e-07,
"loss": 3.0237,
"step": 21550
},
{
"epoch": 4.65,
"learning_rate": 6.231954003368178e-07,
"loss": 3.0413,
"step": 21560
},
{
"epoch": 4.66,
"learning_rate": 6.154618232628723e-07,
"loss": 3.0303,
"step": 21570
},
{
"epoch": 4.66,
"learning_rate": 6.077759333384614e-07,
"loss": 3.0201,
"step": 21580
},
{
"epoch": 4.66,
"learning_rate": 6.001377455943452e-07,
"loss": 3.0257,
"step": 21590
},
{
"epoch": 4.66,
"learning_rate": 5.925472749680028e-07,
"loss": 3.0073,
"step": 21600
},
{
"epoch": 4.67,
"learning_rate": 5.850045363035878e-07,
"loss": 3.08,
"step": 21610
},
{
"epoch": 4.67,
"learning_rate": 5.775095443519119e-07,
"loss": 3.0569,
"step": 21620
},
{
"epoch": 4.67,
"learning_rate": 5.700623137704087e-07,
"loss": 3.0756,
"step": 21630
},
{
"epoch": 4.67,
"learning_rate": 5.626628591231115e-07,
"loss": 2.9925,
"step": 21640
},
{
"epoch": 4.67,
"learning_rate": 5.553111948806227e-07,
"loss": 3.0152,
"step": 21650
},
{
"epoch": 4.68,
"learning_rate": 5.48007335420081e-07,
"loss": 3.0232,
"step": 21660
},
{
"epoch": 4.68,
"learning_rate": 5.407512950251381e-07,
"loss": 3.037,
"step": 21670
},
{
"epoch": 4.68,
"learning_rate": 5.335430878859238e-07,
"loss": 3.0561,
"step": 21680
},
{
"epoch": 4.68,
"learning_rate": 5.263827280990341e-07,
"loss": 3.042,
"step": 21690
},
{
"epoch": 4.68,
"learning_rate": 5.192702296674873e-07,
"loss": 3.0384,
"step": 21700
},
{
"epoch": 4.69,
"learning_rate": 5.122056065007013e-07,
"loss": 3.0211,
"step": 21710
},
{
"epoch": 4.69,
"learning_rate": 5.051888724144693e-07,
"loss": 3.0336,
"step": 21720
},
{
"epoch": 4.69,
"learning_rate": 4.98220041130934e-07,
"loss": 3.0841,
"step": 21730
},
{
"epoch": 4.69,
"learning_rate": 4.912991262785521e-07,
"loss": 2.975,
"step": 21740
},
{
"epoch": 4.7,
"learning_rate": 4.844261413920804e-07,
"loss": 3.0994,
"step": 21750
},
{
"epoch": 4.7,
"learning_rate": 4.776010999125335e-07,
"loss": 3.0243,
"step": 21760
},
{
"epoch": 4.7,
"learning_rate": 4.70824015187174e-07,
"loss": 2.9907,
"step": 21770
},
{
"epoch": 4.7,
"learning_rate": 4.640949004694778e-07,
"loss": 3.0567,
"step": 21780
},
{
"epoch": 4.7,
"learning_rate": 4.5741376891910717e-07,
"loss": 3.0485,
"step": 21790
},
{
"epoch": 4.71,
"learning_rate": 4.507806336018855e-07,
"loss": 3.0302,
"step": 21800
},
{
"epoch": 4.71,
"learning_rate": 4.44195507489778e-07,
"loss": 2.9561,
"step": 21810
},
{
"epoch": 4.71,
"learning_rate": 4.376584034608583e-07,
"loss": 3.0077,
"step": 21820
},
{
"epoch": 4.71,
"learning_rate": 4.3116933429928065e-07,
"loss": 3.0666,
"step": 21830
},
{
"epoch": 4.72,
"learning_rate": 4.2472831269527724e-07,
"loss": 3.0037,
"step": 21840
},
{
"epoch": 4.72,
"learning_rate": 4.183353512451055e-07,
"loss": 3.0503,
"step": 21850
},
{
"epoch": 4.72,
"learning_rate": 4.1199046245103123e-07,
"loss": 3.0393,
"step": 21860
},
{
"epoch": 4.72,
"learning_rate": 4.056936587213178e-07,
"loss": 3.0015,
"step": 21870
},
{
"epoch": 4.72,
"learning_rate": 3.994449523701843e-07,
"loss": 3.0589,
"step": 21880
},
{
"epoch": 4.73,
"learning_rate": 3.9324435561779726e-07,
"loss": 3.0104,
"step": 21890
},
{
"epoch": 4.73,
"learning_rate": 3.8709188059022627e-07,
"loss": 3.0702,
"step": 21900
},
{
"epoch": 4.73,
"learning_rate": 3.8098753931944396e-07,
"loss": 3.0534,
"step": 21910
},
{
"epoch": 4.73,
"learning_rate": 3.7493134374329e-07,
"loss": 2.9991,
"step": 21920
},
{
"epoch": 4.73,
"learning_rate": 3.689233057054403e-07,
"loss": 3.0207,
"step": 21930
},
{
"epoch": 4.74,
"learning_rate": 3.6296343695539915e-07,
"loss": 3.0348,
"step": 21940
},
{
"epoch": 4.74,
"learning_rate": 3.5705174914847373e-07,
"loss": 3.0398,
"step": 21950
},
{
"epoch": 4.74,
"learning_rate": 3.5118825384573283e-07,
"loss": 2.9959,
"step": 21960
},
{
"epoch": 4.74,
"learning_rate": 3.453729625140151e-07,
"loss": 3.0605,
"step": 21970
},
{
"epoch": 4.75,
"learning_rate": 3.396058865258789e-07,
"loss": 3.006,
"step": 21980
},
{
"epoch": 4.75,
"learning_rate": 3.338870371595942e-07,
"loss": 2.9907,
"step": 21990
},
{
"epoch": 4.75,
"learning_rate": 3.2821642559912044e-07,
"loss": 3.0006,
"step": 22000
},
{
"epoch": 4.75,
"learning_rate": 3.225940629340757e-07,
"loss": 3.0078,
"step": 22010
},
{
"epoch": 4.75,
"learning_rate": 3.1701996015972847e-07,
"loss": 3.0516,
"step": 22020
},
{
"epoch": 4.76,
"learning_rate": 3.114941281769618e-07,
"loss": 3.0552,
"step": 22030
},
{
"epoch": 4.76,
"learning_rate": 3.060165777922619e-07,
"loss": 3.03,
"step": 22040
},
{
"epoch": 4.76,
"learning_rate": 3.0058731971769894e-07,
"loss": 3.0479,
"step": 22050
},
{
"epoch": 4.76,
"learning_rate": 2.952063645708908e-07,
"loss": 3.0336,
"step": 22060
},
{
"epoch": 4.76,
"learning_rate": 2.8987372287499757e-07,
"loss": 3.0098,
"step": 22070
},
{
"epoch": 4.77,
"learning_rate": 2.8458940505870224e-07,
"loss": 3.0006,
"step": 22080
},
{
"epoch": 4.77,
"learning_rate": 2.793534214561744e-07,
"loss": 3.0279,
"step": 22090
},
{
"epoch": 4.77,
"learning_rate": 2.741657823070648e-07,
"loss": 3.0492,
"step": 22100
},
{
"epoch": 4.77,
"learning_rate": 2.690264977564777e-07,
"loss": 3.0646,
"step": 22110
},
{
"epoch": 4.78,
"learning_rate": 2.639355778549568e-07,
"loss": 2.9875,
"step": 22120
},
{
"epoch": 4.78,
"learning_rate": 2.588930325584632e-07,
"loss": 3.0109,
"step": 22130
},
{
"epoch": 4.78,
"learning_rate": 2.5389887172835024e-07,
"loss": 3.0181,
"step": 22140
},
{
"epoch": 4.78,
"learning_rate": 2.4895310513135263e-07,
"loss": 3.0555,
"step": 22150
},
{
"epoch": 4.78,
"learning_rate": 2.4405574243956117e-07,
"loss": 3.006,
"step": 22160
},
{
"epoch": 4.79,
"learning_rate": 2.3920679323041207e-07,
"loss": 3.0852,
"step": 22170
},
{
"epoch": 4.79,
"learning_rate": 2.3440626698665312e-07,
"loss": 3.0004,
"step": 22180
},
{
"epoch": 4.79,
"learning_rate": 2.2965417309634695e-07,
"loss": 3.0681,
"step": 22190
},
{
"epoch": 4.79,
"learning_rate": 2.2495052085282908e-07,
"loss": 3.0736,
"step": 22200
},
{
"epoch": 4.79,
"learning_rate": 2.202953194547108e-07,
"loss": 2.9551,
"step": 22210
},
{
"epoch": 4.8,
"learning_rate": 2.1568857800584042e-07,
"loss": 3.0467,
"step": 22220
},
{
"epoch": 4.8,
"learning_rate": 2.1113030551530576e-07,
"loss": 3.0229,
"step": 22230
},
{
"epoch": 4.8,
"learning_rate": 2.0662051089740396e-07,
"loss": 3.0546,
"step": 22240
},
{
"epoch": 4.8,
"learning_rate": 2.0215920297162739e-07,
"loss": 2.9941,
"step": 22250
},
{
"epoch": 4.81,
"learning_rate": 1.9774639046264698e-07,
"loss": 3.011,
"step": 22260
},
{
"epoch": 4.81,
"learning_rate": 1.9338208200029295e-07,
"loss": 3.0244,
"step": 22270
},
{
"epoch": 4.81,
"learning_rate": 1.890662861195408e-07,
"loss": 3.0414,
"step": 22280
},
{
"epoch": 4.81,
"learning_rate": 1.847990112604947e-07,
"loss": 3.0303,
"step": 22290
},
{
"epoch": 4.81,
"learning_rate": 1.805802657683653e-07,
"loss": 3.0888,
"step": 22300
},
{
"epoch": 4.82,
"learning_rate": 1.7641005789346134e-07,
"loss": 3.021,
"step": 22310
},
{
"epoch": 4.82,
"learning_rate": 1.722883957911703e-07,
"loss": 3.0116,
"step": 22320
},
{
"epoch": 4.82,
"learning_rate": 1.682152875219417e-07,
"loss": 2.9938,
"step": 22330
},
{
"epoch": 4.82,
"learning_rate": 1.6419074105126776e-07,
"loss": 3.042,
"step": 22340
},
{
"epoch": 4.83,
"learning_rate": 1.6021476424967485e-07,
"loss": 3.0589,
"step": 22350
},
{
"epoch": 4.83,
"learning_rate": 1.5628736489270713e-07,
"loss": 3.0067,
"step": 22360
},
{
"epoch": 4.83,
"learning_rate": 1.5240855066090686e-07,
"loss": 3.0398,
"step": 22370
},
{
"epoch": 4.83,
"learning_rate": 1.4857832913980075e-07,
"loss": 3.0409,
"step": 22380
},
{
"epoch": 4.83,
"learning_rate": 1.4479670781988863e-07,
"loss": 3.0694,
"step": 22390
},
{
"epoch": 4.84,
"learning_rate": 1.4106369409662422e-07,
"loss": 3.0601,
"step": 22400
},
{
"epoch": 4.84,
"learning_rate": 1.3737929527040117e-07,
"loss": 3.0397,
"step": 22410
},
{
"epoch": 4.84,
"learning_rate": 1.3374351854654466e-07,
"loss": 3.0494,
"step": 22420
},
{
"epoch": 4.84,
"learning_rate": 1.3015637103529487e-07,
"loss": 3.0838,
"step": 22430
},
{
"epoch": 4.84,
"learning_rate": 1.266178597517792e-07,
"loss": 3.0292,
"step": 22440
},
{
"epoch": 4.85,
"learning_rate": 1.2312799161602606e-07,
"loss": 3.0388,
"step": 22450
},
{
"epoch": 4.85,
"learning_rate": 1.1968677345292612e-07,
"loss": 2.9938,
"step": 22460
},
{
"epoch": 4.85,
"learning_rate": 1.1629421199222667e-07,
"loss": 3.1058,
"step": 22470
},
{
"epoch": 4.85,
"learning_rate": 1.1295031386853171e-07,
"loss": 3.0278,
"step": 22480
},
{
"epoch": 4.86,
"learning_rate": 1.0965508562126581e-07,
"loss": 3.0636,
"step": 22490
},
{
"epoch": 4.86,
"learning_rate": 1.0640853369467963e-07,
"loss": 3.0291,
"step": 22500
},
{
"epoch": 4.86,
"learning_rate": 1.0321066443783056e-07,
"loss": 3.0297,
"step": 22510
},
{
"epoch": 4.86,
"learning_rate": 1.0006148410456883e-07,
"loss": 3.0296,
"step": 22520
},
{
"epoch": 4.86,
"learning_rate": 9.696099885352916e-08,
"loss": 3.0684,
"step": 22530
},
{
"epoch": 4.87,
"learning_rate": 9.390921474811409e-08,
"loss": 2.9799,
"step": 22540
},
{
"epoch": 4.87,
"learning_rate": 9.090613775648571e-08,
"loss": 3.0479,
"step": 22550
},
{
"epoch": 4.87,
"learning_rate": 8.79517737515545e-08,
"loss": 3.0182,
"step": 22560
},
{
"epoch": 4.87,
"learning_rate": 8.504612851096272e-08,
"loss": 3.0387,
"step": 22570
},
{
"epoch": 4.87,
"learning_rate": 8.218920771708439e-08,
"loss": 3.0355,
"step": 22580
},
{
"epoch": 4.88,
"learning_rate": 7.938101695699473e-08,
"loss": 3.0373,
"step": 22590
},
{
"epoch": 4.88,
"learning_rate": 7.662156172248136e-08,
"loss": 3.0462,
"step": 22600
},
{
"epoch": 4.88,
"learning_rate": 7.39108474100192e-08,
"loss": 3.0091,
"step": 22610
},
{
"epoch": 4.88,
"learning_rate": 7.124887932076496e-08,
"loss": 3.0797,
"step": 22620
},
{
"epoch": 4.89,
"learning_rate": 6.863566266054333e-08,
"loss": 3.0681,
"step": 22630
},
{
"epoch": 4.89,
"learning_rate": 6.607120253984134e-08,
"loss": 3.076,
"step": 22640
},
{
"epoch": 4.89,
"learning_rate": 6.355550397379451e-08,
"loss": 3.0132,
"step": 22650
},
{
"epoch": 4.89,
"learning_rate": 6.108857188218131e-08,
"loss": 3.0115,
"step": 22660
},
{
"epoch": 4.89,
"learning_rate": 5.867041108941207e-08,
"loss": 3.0621,
"step": 22670
},
{
"epoch": 4.9,
"learning_rate": 5.630102632451228e-08,
"loss": 3.031,
"step": 22680
},
{
"epoch": 4.9,
"learning_rate": 5.398042222112265e-08,
"loss": 3.022,
"step": 22690
},
{
"epoch": 4.9,
"learning_rate": 5.1708603317490725e-08,
"loss": 3.0581,
"step": 22700
},
{
"epoch": 4.9,
"learning_rate": 4.948557405645426e-08,
"loss": 3.0281,
"step": 22710
},
{
"epoch": 4.91,
"learning_rate": 4.731133878543847e-08,
"loss": 3.047,
"step": 22720
},
{
"epoch": 4.91,
"learning_rate": 4.518590175644211e-08,
"loss": 3.035,
"step": 22730
},
{
"epoch": 4.91,
"learning_rate": 4.310926712603469e-08,
"loss": 3.0415,
"step": 22740
},
{
"epoch": 4.91,
"learning_rate": 4.1081438955348215e-08,
"loss": 3.0313,
"step": 22750
},
{
"epoch": 4.91,
"learning_rate": 3.910242121006602e-08,
"loss": 3.0021,
"step": 22760
},
{
"epoch": 4.92,
"learning_rate": 3.717221776041446e-08,
"loss": 3.0509,
"step": 22770
},
{
"epoch": 4.92,
"learning_rate": 3.5290832381160155e-08,
"loss": 3.0877,
"step": 22780
},
{
"epoch": 4.92,
"learning_rate": 3.3458268751593305e-08,
"loss": 3.047,
"step": 22790
},
{
"epoch": 4.92,
"learning_rate": 3.167453045553603e-08,
"loss": 3.0126,
"step": 22800
},
{
"epoch": 4.92,
"learning_rate": 2.993962098132297e-08,
"loss": 3.0296,
"step": 22810
},
{
"epoch": 4.93,
"learning_rate": 2.8253543721790122e-08,
"loss": 3.0058,
"step": 22820
},
{
"epoch": 4.93,
"learning_rate": 2.6616301974285994e-08,
"loss": 3.0456,
"step": 22830
},
{
"epoch": 4.93,
"learning_rate": 2.5027898940649386e-08,
"loss": 3.0723,
"step": 22840
},
{
"epoch": 4.93,
"learning_rate": 2.348833772721215e-08,
"loss": 3.0347,
"step": 22850
},
{
"epoch": 4.94,
"learning_rate": 2.1997621344785335e-08,
"loss": 3.0497,
"step": 22860
},
{
"epoch": 4.94,
"learning_rate": 2.055575270865917e-08,
"loss": 3.028,
"step": 22870
},
{
"epoch": 4.94,
"learning_rate": 1.9162734638597524e-08,
"loss": 3.0774,
"step": 22880
},
{
"epoch": 4.94,
"learning_rate": 1.7818569858826793e-08,
"loss": 3.0382,
"step": 22890
},
{
"epoch": 4.94,
"learning_rate": 1.6523260998041467e-08,
"loss": 3.074,
"step": 22900
},
{
"epoch": 4.95,
"learning_rate": 1.5276810589381907e-08,
"loss": 3.0258,
"step": 22910
},
{
"epoch": 4.95,
"learning_rate": 1.4079221070448234e-08,
"loss": 2.9724,
"step": 22920
},
{
"epoch": 4.95,
"learning_rate": 1.29304947832809e-08,
"loss": 3.0074,
"step": 22930
},
{
"epoch": 4.95,
"learning_rate": 1.1830633974363459e-08,
"loss": 3.0574,
"step": 22940
},
{
"epoch": 4.95,
"learning_rate": 1.077964079462257e-08,
"loss": 3.0382,
"step": 22950
},
{
"epoch": 4.96,
"learning_rate": 9.777517299408567e-09,
"loss": 3.0286,
"step": 22960
},
{
"epoch": 4.96,
"learning_rate": 8.824265448503789e-09,
"loss": 2.9604,
"step": 22970
},
{
"epoch": 4.96,
"learning_rate": 7.919887106119794e-09,
"loss": 3.0551,
"step": 22980
},
{
"epoch": 4.96,
"learning_rate": 7.064384040886274e-09,
"loss": 2.9933,
"step": 22990
},
{
"epoch": 4.97,
"learning_rate": 6.257757925848262e-09,
"loss": 2.993,
"step": 23000
},
{
"epoch": 4.97,
"learning_rate": 5.500010338471695e-09,
"loss": 3.0509,
"step": 23010
},
{
"epoch": 4.97,
"learning_rate": 4.79114276062953e-09,
"loss": 3.0074,
"step": 23020
},
{
"epoch": 4.97,
"learning_rate": 4.131156578604523e-09,
"loss": 3.0275,
"step": 23030
},
{
"epoch": 4.97,
"learning_rate": 3.5200530830864496e-09,
"loss": 3.021,
"step": 23040
},
{
"epoch": 4.98,
"learning_rate": 2.95783346916656e-09,
"loss": 3.031,
"step": 23050
},
{
"epoch": 4.98,
"learning_rate": 2.4444988363431232e-09,
"loss": 3.0166,
"step": 23060
},
{
"epoch": 4.98,
"learning_rate": 1.9800501885047782e-09,
"loss": 3.0235,
"step": 23070
},
{
"epoch": 4.98,
"learning_rate": 1.564488433944411e-09,
"loss": 3.0617,
"step": 23080
},
{
"epoch": 4.98,
"learning_rate": 1.1978143853424995e-09,
"loss": 3.024,
"step": 23090
},
{
"epoch": 4.99,
"learning_rate": 8.800287597837686e-10,
"loss": 3.0316,
"step": 23100
},
{
"epoch": 4.99,
"learning_rate": 6.111321787377611e-10,
"loss": 3.0242,
"step": 23110
},
{
"epoch": 4.99,
"learning_rate": 3.911251680643879e-10,
"loss": 3.0317,
"step": 23120
},
{
"epoch": 4.99,
"learning_rate": 2.200081580167046e-10,
"loss": 3.0298,
"step": 23130
},
{
"epoch": 5.0,
"learning_rate": 9.778148324091074e-11,
"loss": 3.084,
"step": 23140
},
{
"epoch": 5.0,
"learning_rate": 2.4445382762472435e-11,
"loss": 3.051,
"step": 23150
},
{
"epoch": 5.0,
"learning_rate": 0.0,
"loss": 2.9821,
"step": 23160
},
{
"epoch": 5.0,
"eval_loss": 3.0402331352233887,
"eval_runtime": 191.4444,
"eval_samples_per_second": 774.22,
"eval_steps_per_second": 24.195,
"step": 23160
},
{
"epoch": 5.0,
"step": 23160,
"total_flos": 8.495719075924673e+18,
"train_loss": 3.1306352193697333,
"train_runtime": 16128.3584,
"train_samples_per_second": 183.799,
"train_steps_per_second": 1.436
}
],
"max_steps": 23160,
"num_train_epochs": 5,
"total_flos": 8.495719075924673e+18,
"trial_name": null,
"trial_params": null
}