finetuned_models/alpaca2_couplets_5e/trainer_state.json

13962 lines
277 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 23160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 7.194244604316547e-07,
"loss": 3.8427,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 1.4388489208633094e-06,
"loss": 3.8225,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 2.1582733812949645e-06,
"loss": 3.7848,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 2.877697841726619e-06,
"loss": 3.848,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 3.5971223021582732e-06,
"loss": 3.7495,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 4.316546762589929e-06,
"loss": 3.7578,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 5.035971223021583e-06,
"loss": 3.6901,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 5.755395683453238e-06,
"loss": 3.7084,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 6.474820143884892e-06,
"loss": 3.6912,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 7.1942446043165465e-06,
"loss": 3.6671,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 7.913669064748202e-06,
"loss": 3.6075,
"step": 110
},
{
"epoch": 0.03,
"learning_rate": 8.633093525179858e-06,
"loss": 3.6624,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 9.352517985611512e-06,
"loss": 3.598,
"step": 130
},
{
"epoch": 0.03,
"learning_rate": 1.0071942446043167e-05,
"loss": 3.6283,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 1.0791366906474821e-05,
"loss": 3.5911,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 1.1510791366906475e-05,
"loss": 3.5508,
"step": 160
},
{
"epoch": 0.04,
"learning_rate": 1.223021582733813e-05,
"loss": 3.5457,
"step": 170
},
{
"epoch": 0.04,
"learning_rate": 1.2949640287769784e-05,
"loss": 3.585,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 1.366906474820144e-05,
"loss": 3.6085,
"step": 190
},
{
"epoch": 0.04,
"learning_rate": 1.4388489208633093e-05,
"loss": 3.5565,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 1.5107913669064749e-05,
"loss": 3.5523,
"step": 210
},
{
"epoch": 0.05,
"learning_rate": 1.5827338129496403e-05,
"loss": 3.4997,
"step": 220
},
{
"epoch": 0.05,
"learning_rate": 1.6546762589928058e-05,
"loss": 3.5455,
"step": 230
},
{
"epoch": 0.05,
"learning_rate": 1.7194244604316546e-05,
"loss": 3.5213,
"step": 240
},
{
"epoch": 0.05,
"learning_rate": 1.784172661870504e-05,
"loss": 3.4773,
"step": 250
},
{
"epoch": 0.06,
"learning_rate": 1.856115107913669e-05,
"loss": 3.4821,
"step": 260
},
{
"epoch": 0.06,
"learning_rate": 1.9280575539568347e-05,
"loss": 3.4343,
"step": 270
},
{
"epoch": 0.06,
"learning_rate": 2e-05,
"loss": 3.5271,
"step": 280
},
{
"epoch": 0.06,
"learning_rate": 2.0719424460431656e-05,
"loss": 3.5013,
"step": 290
},
{
"epoch": 0.06,
"learning_rate": 2.143884892086331e-05,
"loss": 3.4658,
"step": 300
},
{
"epoch": 0.07,
"learning_rate": 2.2158273381294965e-05,
"loss": 3.5091,
"step": 310
},
{
"epoch": 0.07,
"learning_rate": 2.287769784172662e-05,
"loss": 3.4952,
"step": 320
},
{
"epoch": 0.07,
"learning_rate": 2.3597122302158277e-05,
"loss": 3.5359,
"step": 330
},
{
"epoch": 0.07,
"learning_rate": 2.4316546762589928e-05,
"loss": 3.431,
"step": 340
},
{
"epoch": 0.08,
"learning_rate": 2.489208633093525e-05,
"loss": 3.4908,
"step": 350
},
{
"epoch": 0.08,
"learning_rate": 2.5611510791366905e-05,
"loss": 3.4977,
"step": 360
},
{
"epoch": 0.08,
"learning_rate": 2.633093525179856e-05,
"loss": 3.4874,
"step": 370
},
{
"epoch": 0.08,
"learning_rate": 2.7050359712230217e-05,
"loss": 3.4772,
"step": 380
},
{
"epoch": 0.08,
"learning_rate": 2.7769784172661872e-05,
"loss": 3.4266,
"step": 390
},
{
"epoch": 0.09,
"learning_rate": 2.848920863309353e-05,
"loss": 3.4698,
"step": 400
},
{
"epoch": 0.09,
"learning_rate": 2.9208633093525184e-05,
"loss": 3.4771,
"step": 410
},
{
"epoch": 0.09,
"learning_rate": 2.9928057553956835e-05,
"loss": 3.4403,
"step": 420
},
{
"epoch": 0.09,
"learning_rate": 3.064748201438849e-05,
"loss": 3.4628,
"step": 430
},
{
"epoch": 0.09,
"learning_rate": 3.1366906474820144e-05,
"loss": 3.4616,
"step": 440
},
{
"epoch": 0.1,
"learning_rate": 3.20863309352518e-05,
"loss": 3.4341,
"step": 450
},
{
"epoch": 0.1,
"learning_rate": 3.280575539568346e-05,
"loss": 3.4667,
"step": 460
},
{
"epoch": 0.1,
"learning_rate": 3.345323741007194e-05,
"loss": 3.4077,
"step": 470
},
{
"epoch": 0.1,
"learning_rate": 3.41726618705036e-05,
"loss": 3.406,
"step": 480
},
{
"epoch": 0.11,
"learning_rate": 3.482014388489209e-05,
"loss": 3.4147,
"step": 490
},
{
"epoch": 0.11,
"learning_rate": 3.5467625899280576e-05,
"loss": 3.4432,
"step": 500
},
{
"epoch": 0.11,
"learning_rate": 3.6187050359712234e-05,
"loss": 3.3991,
"step": 510
},
{
"epoch": 0.11,
"learning_rate": 3.6906474820143885e-05,
"loss": 3.3806,
"step": 520
},
{
"epoch": 0.11,
"learning_rate": 3.762589928057554e-05,
"loss": 3.3739,
"step": 530
},
{
"epoch": 0.12,
"learning_rate": 3.8345323741007194e-05,
"loss": 3.3835,
"step": 540
},
{
"epoch": 0.12,
"learning_rate": 3.906474820143885e-05,
"loss": 3.3658,
"step": 550
},
{
"epoch": 0.12,
"learning_rate": 3.97841726618705e-05,
"loss": 3.4067,
"step": 560
},
{
"epoch": 0.12,
"learning_rate": 4.050359712230216e-05,
"loss": 3.4224,
"step": 570
},
{
"epoch": 0.13,
"learning_rate": 4.122302158273382e-05,
"loss": 3.373,
"step": 580
},
{
"epoch": 0.13,
"learning_rate": 4.194244604316547e-05,
"loss": 3.4039,
"step": 590
},
{
"epoch": 0.13,
"learning_rate": 4.266187050359712e-05,
"loss": 3.4349,
"step": 600
},
{
"epoch": 0.13,
"learning_rate": 4.338129496402878e-05,
"loss": 3.3907,
"step": 610
},
{
"epoch": 0.13,
"learning_rate": 4.4100719424460436e-05,
"loss": 3.4346,
"step": 620
},
{
"epoch": 0.14,
"learning_rate": 4.4820143884892093e-05,
"loss": 3.368,
"step": 630
},
{
"epoch": 0.14,
"learning_rate": 4.553956834532374e-05,
"loss": 3.429,
"step": 640
},
{
"epoch": 0.14,
"learning_rate": 4.6258992805755395e-05,
"loss": 3.3539,
"step": 650
},
{
"epoch": 0.14,
"learning_rate": 4.697841726618705e-05,
"loss": 3.3892,
"step": 660
},
{
"epoch": 0.14,
"learning_rate": 4.769784172661871e-05,
"loss": 3.3811,
"step": 670
},
{
"epoch": 0.15,
"learning_rate": 4.841726618705036e-05,
"loss": 3.3882,
"step": 680
},
{
"epoch": 0.15,
"learning_rate": 4.913669064748201e-05,
"loss": 3.3893,
"step": 690
},
{
"epoch": 0.15,
"learning_rate": 4.985611510791367e-05,
"loss": 3.3458,
"step": 700
},
{
"epoch": 0.15,
"learning_rate": 4.999998802176146e-05,
"loss": 3.3471,
"step": 710
},
{
"epoch": 0.16,
"learning_rate": 4.999993741983604e-05,
"loss": 3.3731,
"step": 720
},
{
"epoch": 0.16,
"learning_rate": 4.999983474936766e-05,
"loss": 3.4107,
"step": 730
},
{
"epoch": 0.16,
"learning_rate": 4.999968318845691e-05,
"loss": 3.3364,
"step": 740
},
{
"epoch": 0.16,
"learning_rate": 4.999948273740022e-05,
"loss": 3.3904,
"step": 750
},
{
"epoch": 0.16,
"learning_rate": 4.9999233396589565e-05,
"loss": 3.3996,
"step": 760
},
{
"epoch": 0.17,
"learning_rate": 4.999893516651259e-05,
"loss": 3.3909,
"step": 770
},
{
"epoch": 0.17,
"learning_rate": 4.9998624959600656e-05,
"loss": 3.3651,
"step": 780
},
{
"epoch": 0.17,
"learning_rate": 4.999823384160347e-05,
"loss": 3.3359,
"step": 790
},
{
"epoch": 0.17,
"learning_rate": 4.999779383629471e-05,
"loss": 3.3575,
"step": 800
},
{
"epoch": 0.17,
"learning_rate": 4.999730494453487e-05,
"loss": 3.3262,
"step": 810
},
{
"epoch": 0.18,
"learning_rate": 4.9996767167280034e-05,
"loss": 3.3824,
"step": 820
},
{
"epoch": 0.18,
"learning_rate": 4.9996180505581904e-05,
"loss": 3.3735,
"step": 830
},
{
"epoch": 0.18,
"learning_rate": 4.999554496058777e-05,
"loss": 3.3515,
"step": 840
},
{
"epoch": 0.18,
"learning_rate": 4.9994860533540526e-05,
"loss": 3.3682,
"step": 850
},
{
"epoch": 0.19,
"learning_rate": 4.999412722577866e-05,
"loss": 3.2884,
"step": 860
},
{
"epoch": 0.19,
"learning_rate": 4.999334503873624e-05,
"loss": 3.4059,
"step": 870
},
{
"epoch": 0.19,
"learning_rate": 4.9992513973942954e-05,
"loss": 3.3279,
"step": 880
},
{
"epoch": 0.19,
"learning_rate": 4.9991724226493286e-05,
"loss": 3.3232,
"step": 890
},
{
"epoch": 0.19,
"learning_rate": 4.999080029852991e-05,
"loss": 3.3733,
"step": 900
},
{
"epoch": 0.2,
"learning_rate": 4.998982749779222e-05,
"loss": 3.3456,
"step": 910
},
{
"epoch": 0.2,
"learning_rate": 4.998880582618267e-05,
"loss": 3.3176,
"step": 920
},
{
"epoch": 0.2,
"learning_rate": 4.998773528569926e-05,
"loss": 3.3821,
"step": 930
},
{
"epoch": 0.2,
"learning_rate": 4.9986615878435584e-05,
"loss": 3.2929,
"step": 940
},
{
"epoch": 0.21,
"learning_rate": 4.9985447606580784e-05,
"loss": 3.344,
"step": 950
},
{
"epoch": 0.21,
"learning_rate": 4.998423047241956e-05,
"loss": 3.3026,
"step": 960
},
{
"epoch": 0.21,
"learning_rate": 4.9982964478332184e-05,
"loss": 3.3415,
"step": 970
},
{
"epoch": 0.21,
"learning_rate": 4.998164962679448e-05,
"loss": 3.3318,
"step": 980
},
{
"epoch": 0.21,
"learning_rate": 4.99802859203778e-05,
"loss": 3.2916,
"step": 990
},
{
"epoch": 0.22,
"learning_rate": 4.9979159781520545e-05,
"loss": 3.3016,
"step": 1000
},
{
"epoch": 0.22,
"learning_rate": 4.99778555051182e-05,
"loss": 3.3575,
"step": 1010
},
{
"epoch": 0.22,
"learning_rate": 4.997635990411991e-05,
"loss": 3.3629,
"step": 1020
},
{
"epoch": 0.22,
"learning_rate": 4.9974815458587394e-05,
"loss": 3.2796,
"step": 1030
},
{
"epoch": 0.22,
"learning_rate": 4.997322217154101e-05,
"loss": 3.3277,
"step": 1040
},
{
"epoch": 0.23,
"learning_rate": 4.9971580046096644e-05,
"loss": 3.3207,
"step": 1050
},
{
"epoch": 0.23,
"learning_rate": 4.9969889085465686e-05,
"loss": 3.2852,
"step": 1060
},
{
"epoch": 0.23,
"learning_rate": 4.996814929295503e-05,
"loss": 3.3225,
"step": 1070
},
{
"epoch": 0.23,
"learning_rate": 4.9966360671967074e-05,
"loss": 3.3052,
"step": 1080
},
{
"epoch": 0.24,
"learning_rate": 4.9964523225999695e-05,
"loss": 3.268,
"step": 1090
},
{
"epoch": 0.24,
"learning_rate": 4.9962636958646255e-05,
"loss": 3.2969,
"step": 1100
},
{
"epoch": 0.24,
"learning_rate": 4.99607018735956e-05,
"loss": 3.2677,
"step": 1110
},
{
"epoch": 0.24,
"learning_rate": 4.995911865935293e-05,
"loss": 3.3533,
"step": 1120
},
{
"epoch": 0.24,
"learning_rate": 4.995709571204791e-05,
"loss": 3.315,
"step": 1130
},
{
"epoch": 0.25,
"learning_rate": 4.9955023957882294e-05,
"loss": 3.3084,
"step": 1140
},
{
"epoch": 0.25,
"learning_rate": 4.995290340090768e-05,
"loss": 3.2754,
"step": 1150
},
{
"epoch": 0.25,
"learning_rate": 4.995073404527111e-05,
"loss": 3.3025,
"step": 1160
},
{
"epoch": 0.25,
"learning_rate": 4.9948515895215006e-05,
"loss": 3.3049,
"step": 1170
},
{
"epoch": 0.25,
"learning_rate": 4.994624895507728e-05,
"loss": 3.3142,
"step": 1180
},
{
"epoch": 0.26,
"learning_rate": 4.994393322929123e-05,
"loss": 3.2972,
"step": 1190
},
{
"epoch": 0.26,
"learning_rate": 4.994156872238553e-05,
"loss": 3.2712,
"step": 1200
},
{
"epoch": 0.26,
"learning_rate": 4.993915543898432e-05,
"loss": 3.3262,
"step": 1210
},
{
"epoch": 0.26,
"learning_rate": 4.993694178391824e-05,
"loss": 3.2932,
"step": 1220
},
{
"epoch": 0.27,
"learning_rate": 4.993443583825655e-05,
"loss": 3.3004,
"step": 1230
},
{
"epoch": 0.27,
"learning_rate": 4.993188113004862e-05,
"loss": 3.2921,
"step": 1240
},
{
"epoch": 0.27,
"learning_rate": 4.9929277664290515e-05,
"loss": 3.29,
"step": 1250
},
{
"epoch": 0.27,
"learning_rate": 4.992662544607365e-05,
"loss": 3.2609,
"step": 1260
},
{
"epoch": 0.27,
"learning_rate": 4.992392448058478e-05,
"loss": 3.2887,
"step": 1270
},
{
"epoch": 0.28,
"learning_rate": 4.9921174773106014e-05,
"loss": 3.2751,
"step": 1280
},
{
"epoch": 0.28,
"learning_rate": 4.991865836641621e-05,
"loss": 3.3133,
"step": 1290
},
{
"epoch": 0.28,
"learning_rate": 4.991581606405016e-05,
"loss": 3.3337,
"step": 1300
},
{
"epoch": 0.28,
"learning_rate": 4.991292503555127e-05,
"loss": 3.2762,
"step": 1310
},
{
"epoch": 0.28,
"learning_rate": 4.9909985286573324e-05,
"loss": 3.2582,
"step": 1320
},
{
"epoch": 0.29,
"learning_rate": 4.990699682286538e-05,
"loss": 3.2461,
"step": 1330
},
{
"epoch": 0.29,
"learning_rate": 4.9903959650271784e-05,
"loss": 3.2672,
"step": 1340
},
{
"epoch": 0.29,
"learning_rate": 4.990087377473211e-05,
"loss": 3.2696,
"step": 1350
},
{
"epoch": 0.29,
"learning_rate": 4.989773920228119e-05,
"loss": 3.3005,
"step": 1360
},
{
"epoch": 0.3,
"learning_rate": 4.989455593904909e-05,
"loss": 3.271,
"step": 1370
},
{
"epoch": 0.3,
"learning_rate": 4.989132399126111e-05,
"loss": 3.334,
"step": 1380
},
{
"epoch": 0.3,
"learning_rate": 4.988804336523773e-05,
"loss": 3.3045,
"step": 1390
},
{
"epoch": 0.3,
"learning_rate": 4.9884714067394656e-05,
"loss": 3.2555,
"step": 1400
},
{
"epoch": 0.3,
"learning_rate": 4.988133610424276e-05,
"loss": 3.3186,
"step": 1410
},
{
"epoch": 0.31,
"learning_rate": 4.987825433402493e-05,
"loss": 3.2602,
"step": 1420
},
{
"epoch": 0.31,
"learning_rate": 4.987478392506458e-05,
"loss": 3.2959,
"step": 1430
},
{
"epoch": 0.31,
"learning_rate": 4.9871618964569666e-05,
"loss": 3.2765,
"step": 1440
},
{
"epoch": 0.31,
"learning_rate": 4.9868056134301333e-05,
"loss": 3.2889,
"step": 1450
},
{
"epoch": 0.32,
"learning_rate": 4.986444467130095e-05,
"loss": 3.2631,
"step": 1460
},
{
"epoch": 0.32,
"learning_rate": 4.9860784582631184e-05,
"loss": 3.2772,
"step": 1470
},
{
"epoch": 0.32,
"learning_rate": 4.985707587544983e-05,
"loss": 3.2595,
"step": 1480
},
{
"epoch": 0.32,
"learning_rate": 4.9853318557009756e-05,
"loss": 3.2793,
"step": 1490
},
{
"epoch": 0.32,
"learning_rate": 4.984951263465887e-05,
"loss": 3.3642,
"step": 1500
},
{
"epoch": 0.33,
"learning_rate": 4.9845658115840166e-05,
"loss": 3.2765,
"step": 1510
},
{
"epoch": 0.33,
"learning_rate": 4.984175500809165e-05,
"loss": 3.2853,
"step": 1520
},
{
"epoch": 0.33,
"learning_rate": 4.9837803319046365e-05,
"loss": 3.2689,
"step": 1530
},
{
"epoch": 0.33,
"learning_rate": 4.983380305643235e-05,
"loss": 3.2735,
"step": 1540
},
{
"epoch": 0.33,
"learning_rate": 4.982975422807265e-05,
"loss": 3.2772,
"step": 1550
},
{
"epoch": 0.34,
"learning_rate": 4.9826068765378625e-05,
"loss": 3.2604,
"step": 1560
},
{
"epoch": 0.34,
"learning_rate": 4.9821927683994716e-05,
"loss": 3.2121,
"step": 1570
},
{
"epoch": 0.34,
"learning_rate": 4.981773806008896e-05,
"loss": 3.2595,
"step": 1580
},
{
"epoch": 0.34,
"learning_rate": 4.981349990185471e-05,
"loss": 3.2781,
"step": 1590
},
{
"epoch": 0.35,
"learning_rate": 4.980921321758024e-05,
"loss": 3.2498,
"step": 1600
},
{
"epoch": 0.35,
"learning_rate": 4.980531371889556e-05,
"loss": 3.2336,
"step": 1610
},
{
"epoch": 0.35,
"learning_rate": 4.980093485831874e-05,
"loss": 3.2493,
"step": 1620
},
{
"epoch": 0.35,
"learning_rate": 4.979650749627427e-05,
"loss": 3.2805,
"step": 1630
},
{
"epoch": 0.35,
"learning_rate": 4.979203164142045e-05,
"loss": 3.2833,
"step": 1640
},
{
"epoch": 0.36,
"learning_rate": 4.9787507302510393e-05,
"loss": 3.2572,
"step": 1650
},
{
"epoch": 0.36,
"learning_rate": 4.978293448839205e-05,
"loss": 3.2687,
"step": 1660
},
{
"epoch": 0.36,
"learning_rate": 4.977831320800814e-05,
"loss": 3.2722,
"step": 1670
},
{
"epoch": 0.36,
"learning_rate": 4.977364347039618e-05,
"loss": 3.2716,
"step": 1680
},
{
"epoch": 0.36,
"learning_rate": 4.9768925284688464e-05,
"loss": 3.2558,
"step": 1690
},
{
"epoch": 0.37,
"learning_rate": 4.976415866011201e-05,
"loss": 3.2528,
"step": 1700
},
{
"epoch": 0.37,
"learning_rate": 4.975934360598858e-05,
"loss": 3.2425,
"step": 1710
},
{
"epoch": 0.37,
"learning_rate": 4.9754480131734635e-05,
"loss": 3.2225,
"step": 1720
},
{
"epoch": 0.37,
"learning_rate": 4.9749568246861343e-05,
"loss": 3.2227,
"step": 1730
},
{
"epoch": 0.38,
"learning_rate": 4.974460796097454e-05,
"loss": 3.2689,
"step": 1740
},
{
"epoch": 0.38,
"learning_rate": 4.9740604890050905e-05,
"loss": 3.2438,
"step": 1750
},
{
"epoch": 0.38,
"learning_rate": 4.9735557506848586e-05,
"loss": 3.2491,
"step": 1760
},
{
"epoch": 0.38,
"learning_rate": 4.973046175003258e-05,
"loss": 3.2365,
"step": 1770
},
{
"epoch": 0.38,
"learning_rate": 4.972531762956831e-05,
"loss": 3.2286,
"step": 1780
},
{
"epoch": 0.39,
"learning_rate": 4.9720125155515764e-05,
"loss": 3.2696,
"step": 1790
},
{
"epoch": 0.39,
"learning_rate": 4.9714884338029514e-05,
"loss": 3.2397,
"step": 1800
},
{
"epoch": 0.39,
"learning_rate": 4.970959518735866e-05,
"loss": 3.2447,
"step": 1810
},
{
"epoch": 0.39,
"learning_rate": 4.970425771384683e-05,
"loss": 3.2159,
"step": 1820
},
{
"epoch": 0.4,
"learning_rate": 4.969887192793214e-05,
"loss": 3.2682,
"step": 1830
},
{
"epoch": 0.4,
"learning_rate": 4.9693437840147215e-05,
"loss": 3.2,
"step": 1840
},
{
"epoch": 0.4,
"learning_rate": 4.9687955461119105e-05,
"loss": 3.2219,
"step": 1850
},
{
"epoch": 0.4,
"learning_rate": 4.968298003984028e-05,
"loss": 3.2433,
"step": 1860
},
{
"epoch": 0.4,
"learning_rate": 4.967740593706592e-05,
"loss": 3.2454,
"step": 1870
},
{
"epoch": 0.41,
"learning_rate": 4.967234798204984e-05,
"loss": 3.2614,
"step": 1880
},
{
"epoch": 0.41,
"learning_rate": 4.966668219488144e-05,
"loss": 3.1883,
"step": 1890
},
{
"epoch": 0.41,
"learning_rate": 4.966096816879401e-05,
"loss": 3.2841,
"step": 1900
},
{
"epoch": 0.41,
"learning_rate": 4.9655205914962095e-05,
"loss": 3.2539,
"step": 1910
},
{
"epoch": 0.41,
"learning_rate": 4.9649395444654525e-05,
"loss": 3.2266,
"step": 1920
},
{
"epoch": 0.42,
"learning_rate": 4.964353676923443e-05,
"loss": 3.2136,
"step": 1930
},
{
"epoch": 0.42,
"learning_rate": 4.963762990015923e-05,
"loss": 3.2223,
"step": 1940
},
{
"epoch": 0.42,
"learning_rate": 4.963167484898057e-05,
"loss": 3.2384,
"step": 1950
},
{
"epoch": 0.42,
"learning_rate": 4.962567162734433e-05,
"loss": 3.1826,
"step": 1960
},
{
"epoch": 0.43,
"learning_rate": 4.9619620246990596e-05,
"loss": 3.1996,
"step": 1970
},
{
"epoch": 0.43,
"learning_rate": 4.9613520719753626e-05,
"loss": 3.2432,
"step": 1980
},
{
"epoch": 0.43,
"learning_rate": 4.960737305756185e-05,
"loss": 3.2436,
"step": 1990
},
{
"epoch": 0.43,
"learning_rate": 4.960242027871696e-05,
"loss": 3.2052,
"step": 2000
},
{
"epoch": 0.43,
"learning_rate": 4.9596186003966614e-05,
"loss": 3.2187,
"step": 2010
},
{
"epoch": 0.44,
"learning_rate": 4.958990362816176e-05,
"loss": 3.2541,
"step": 2020
},
{
"epoch": 0.44,
"learning_rate": 4.9583573163588405e-05,
"loss": 3.1813,
"step": 2030
},
{
"epoch": 0.44,
"learning_rate": 4.95771946226266e-05,
"loss": 3.2302,
"step": 2040
},
{
"epoch": 0.44,
"learning_rate": 4.957076801775041e-05,
"loss": 3.2261,
"step": 2050
},
{
"epoch": 0.44,
"learning_rate": 4.956494298910062e-05,
"loss": 3.269,
"step": 2060
},
{
"epoch": 0.45,
"learning_rate": 4.955842509748979e-05,
"loss": 3.2446,
"step": 2070
},
{
"epoch": 0.45,
"learning_rate": 4.955185917867085e-05,
"loss": 3.2608,
"step": 2080
},
{
"epoch": 0.45,
"learning_rate": 4.9545245245484295e-05,
"loss": 3.1839,
"step": 2090
},
{
"epoch": 0.45,
"learning_rate": 4.953858331086454e-05,
"loss": 3.2747,
"step": 2100
},
{
"epoch": 0.46,
"learning_rate": 4.9531873387839865e-05,
"loss": 3.2067,
"step": 2110
},
{
"epoch": 0.46,
"learning_rate": 4.952511548953241e-05,
"loss": 3.2304,
"step": 2120
},
{
"epoch": 0.46,
"learning_rate": 4.9518309629158124e-05,
"loss": 3.2081,
"step": 2130
},
{
"epoch": 0.46,
"learning_rate": 4.9511455820026744e-05,
"loss": 3.2018,
"step": 2140
},
{
"epoch": 0.46,
"learning_rate": 4.950455407554181e-05,
"loss": 3.2309,
"step": 2150
},
{
"epoch": 0.47,
"learning_rate": 4.949760440920056e-05,
"loss": 3.1876,
"step": 2160
},
{
"epoch": 0.47,
"learning_rate": 4.9490606834594e-05,
"loss": 3.204,
"step": 2170
},
{
"epoch": 0.47,
"learning_rate": 4.948356136540678e-05,
"loss": 3.1918,
"step": 2180
},
{
"epoch": 0.47,
"learning_rate": 4.947646801541725e-05,
"loss": 3.271,
"step": 2190
},
{
"epoch": 0.47,
"learning_rate": 4.946932679849738e-05,
"loss": 3.2269,
"step": 2200
},
{
"epoch": 0.48,
"learning_rate": 4.9462137728612744e-05,
"loss": 3.2094,
"step": 2210
},
{
"epoch": 0.48,
"learning_rate": 4.9454900819822515e-05,
"loss": 3.1822,
"step": 2220
},
{
"epoch": 0.48,
"learning_rate": 4.944761608627941e-05,
"loss": 3.259,
"step": 2230
},
{
"epoch": 0.48,
"learning_rate": 4.9440283542229674e-05,
"loss": 3.2352,
"step": 2240
},
{
"epoch": 0.49,
"learning_rate": 4.943290320201306e-05,
"loss": 3.2428,
"step": 2250
},
{
"epoch": 0.49,
"learning_rate": 4.94269645262956e-05,
"loss": 3.2326,
"step": 2260
},
{
"epoch": 0.49,
"learning_rate": 4.941949818941311e-05,
"loss": 3.2805,
"step": 2270
},
{
"epoch": 0.49,
"learning_rate": 4.9412737654832576e-05,
"loss": 3.1829,
"step": 2280
},
{
"epoch": 0.49,
"learning_rate": 4.9405180595026494e-05,
"loss": 3.2018,
"step": 2290
},
{
"epoch": 0.5,
"learning_rate": 4.9397575807701936e-05,
"loss": 3.2755,
"step": 2300
},
{
"epoch": 0.5,
"learning_rate": 4.938992330773106e-05,
"loss": 3.1976,
"step": 2310
},
{
"epoch": 0.5,
"learning_rate": 4.93822231100793e-05,
"loss": 3.1758,
"step": 2320
},
{
"epoch": 0.5,
"learning_rate": 4.937447522980544e-05,
"loss": 3.1989,
"step": 2330
},
{
"epoch": 0.51,
"learning_rate": 4.936667968206145e-05,
"loss": 3.2362,
"step": 2340
},
{
"epoch": 0.51,
"learning_rate": 4.935883648209253e-05,
"loss": 3.1525,
"step": 2350
},
{
"epoch": 0.51,
"learning_rate": 4.935094564523711e-05,
"loss": 3.2014,
"step": 2360
},
{
"epoch": 0.51,
"learning_rate": 4.934300718692673e-05,
"loss": 3.23,
"step": 2370
},
{
"epoch": 0.51,
"learning_rate": 4.9335021122686094e-05,
"loss": 3.2516,
"step": 2380
},
{
"epoch": 0.52,
"learning_rate": 4.932779297470532e-05,
"loss": 3.2242,
"step": 2390
},
{
"epoch": 0.52,
"learning_rate": 4.9319716502301134e-05,
"loss": 3.2104,
"step": 2400
},
{
"epoch": 0.52,
"learning_rate": 4.9311592469514654e-05,
"loss": 3.1586,
"step": 2410
},
{
"epoch": 0.52,
"learning_rate": 4.93034208922335e-05,
"loss": 3.2365,
"step": 2420
},
{
"epoch": 0.52,
"learning_rate": 4.929520178643825e-05,
"loss": 3.2238,
"step": 2430
},
{
"epoch": 0.53,
"learning_rate": 4.928776396762584e-05,
"loss": 3.1967,
"step": 2440
},
{
"epoch": 0.53,
"learning_rate": 4.9279454602013206e-05,
"loss": 3.2085,
"step": 2450
},
{
"epoch": 0.53,
"learning_rate": 4.9271097754755696e-05,
"loss": 3.245,
"step": 2460
},
{
"epoch": 0.53,
"learning_rate": 4.926269344219621e-05,
"loss": 3.2504,
"step": 2470
},
{
"epoch": 0.54,
"learning_rate": 4.925424168077048e-05,
"loss": 3.2111,
"step": 2480
},
{
"epoch": 0.54,
"learning_rate": 4.924574248700704e-05,
"loss": 3.1958,
"step": 2490
},
{
"epoch": 0.54,
"learning_rate": 4.9237195877527167e-05,
"loss": 3.2036,
"step": 2500
},
{
"epoch": 0.54,
"learning_rate": 4.922860186904486e-05,
"loss": 3.1771,
"step": 2510
},
{
"epoch": 0.54,
"learning_rate": 4.921996047836684e-05,
"loss": 3.1647,
"step": 2520
},
{
"epoch": 0.55,
"learning_rate": 4.921127172239247e-05,
"loss": 3.2035,
"step": 2530
},
{
"epoch": 0.55,
"learning_rate": 4.920253561811375e-05,
"loss": 3.2114,
"step": 2540
},
{
"epoch": 0.55,
"learning_rate": 4.9193752182615274e-05,
"loss": 3.1911,
"step": 2550
},
{
"epoch": 0.55,
"learning_rate": 4.9184921433074185e-05,
"loss": 3.2062,
"step": 2560
},
{
"epoch": 0.55,
"learning_rate": 4.9176043386760186e-05,
"loss": 3.2394,
"step": 2570
},
{
"epoch": 0.56,
"learning_rate": 4.916711806103545e-05,
"loss": 3.1974,
"step": 2580
},
{
"epoch": 0.56,
"learning_rate": 4.91581454733546e-05,
"loss": 3.1594,
"step": 2590
},
{
"epoch": 0.56,
"learning_rate": 4.9149125641264726e-05,
"loss": 3.2162,
"step": 2600
},
{
"epoch": 0.56,
"learning_rate": 4.914005858240528e-05,
"loss": 3.1592,
"step": 2610
},
{
"epoch": 0.57,
"learning_rate": 4.913185786519723e-05,
"loss": 3.1943,
"step": 2620
},
{
"epoch": 0.57,
"learning_rate": 4.912270112440306e-05,
"loss": 3.2029,
"step": 2630
},
{
"epoch": 0.57,
"learning_rate": 4.911349720851591e-05,
"loss": 3.2339,
"step": 2640
},
{
"epoch": 0.57,
"learning_rate": 4.910424613553524e-05,
"loss": 3.1677,
"step": 2650
},
{
"epoch": 0.57,
"learning_rate": 4.9094947923552737e-05,
"loss": 3.1727,
"step": 2660
},
{
"epoch": 0.58,
"learning_rate": 4.908560259075227e-05,
"loss": 3.173,
"step": 2670
},
{
"epoch": 0.58,
"learning_rate": 4.907621015540984e-05,
"loss": 3.2398,
"step": 2680
},
{
"epoch": 0.58,
"learning_rate": 4.90667706358936e-05,
"loss": 3.1917,
"step": 2690
},
{
"epoch": 0.58,
"learning_rate": 4.9057284050663756e-05,
"loss": 3.1579,
"step": 2700
},
{
"epoch": 0.59,
"learning_rate": 4.9047750418272565e-05,
"loss": 3.2076,
"step": 2710
},
{
"epoch": 0.59,
"learning_rate": 4.9038169757364294e-05,
"loss": 3.2234,
"step": 2720
},
{
"epoch": 0.59,
"learning_rate": 4.9028542086675175e-05,
"loss": 3.1795,
"step": 2730
},
{
"epoch": 0.59,
"learning_rate": 4.9018867425033376e-05,
"loss": 3.2116,
"step": 2740
},
{
"epoch": 0.59,
"learning_rate": 4.900914579135896e-05,
"loss": 3.1667,
"step": 2750
},
{
"epoch": 0.6,
"learning_rate": 4.8999377204663856e-05,
"loss": 3.1789,
"step": 2760
},
{
"epoch": 0.6,
"learning_rate": 4.899054534759291e-05,
"loss": 3.1812,
"step": 2770
},
{
"epoch": 0.6,
"learning_rate": 4.898167548803827e-05,
"loss": 3.197,
"step": 2780
},
{
"epoch": 0.6,
"learning_rate": 4.8971775534810114e-05,
"loss": 3.1801,
"step": 2790
},
{
"epoch": 0.6,
"learning_rate": 4.8961828701643685e-05,
"loss": 3.1764,
"step": 2800
},
{
"epoch": 0.61,
"learning_rate": 4.895183500799131e-05,
"loss": 3.2211,
"step": 2810
},
{
"epoch": 0.61,
"learning_rate": 4.89428006341399e-05,
"loss": 3.2212,
"step": 2820
},
{
"epoch": 0.61,
"learning_rate": 4.893271795948359e-05,
"loss": 3.179,
"step": 2830
},
{
"epoch": 0.61,
"learning_rate": 4.8922588481271204e-05,
"loss": 3.1804,
"step": 2840
},
{
"epoch": 0.62,
"learning_rate": 4.891241221931225e-05,
"loss": 3.1582,
"step": 2850
},
{
"epoch": 0.62,
"learning_rate": 4.8902189193507744e-05,
"loss": 3.1678,
"step": 2860
},
{
"epoch": 0.62,
"learning_rate": 4.889191942385013e-05,
"loss": 3.1826,
"step": 2870
},
{
"epoch": 0.62,
"learning_rate": 4.888160293042331e-05,
"loss": 3.2037,
"step": 2880
},
{
"epoch": 0.62,
"learning_rate": 4.8871239733402516e-05,
"loss": 3.1323,
"step": 2890
},
{
"epoch": 0.63,
"learning_rate": 4.8860829853054346e-05,
"loss": 3.2094,
"step": 2900
},
{
"epoch": 0.63,
"learning_rate": 4.8850373309736674e-05,
"loss": 3.1949,
"step": 2910
},
{
"epoch": 0.63,
"learning_rate": 4.883987012389864e-05,
"loss": 3.1769,
"step": 2920
},
{
"epoch": 0.63,
"learning_rate": 4.882932031608058e-05,
"loss": 3.203,
"step": 2930
},
{
"epoch": 0.63,
"learning_rate": 4.881872390691405e-05,
"loss": 3.2164,
"step": 2940
},
{
"epoch": 0.64,
"learning_rate": 4.880808091712168e-05,
"loss": 3.1869,
"step": 2950
},
{
"epoch": 0.64,
"learning_rate": 4.879739136751725e-05,
"loss": 3.1936,
"step": 2960
},
{
"epoch": 0.64,
"learning_rate": 4.878665527900557e-05,
"loss": 3.1789,
"step": 2970
},
{
"epoch": 0.64,
"learning_rate": 4.8775872672582444e-05,
"loss": 3.1787,
"step": 2980
},
{
"epoch": 0.65,
"learning_rate": 4.876504356933469e-05,
"loss": 3.1452,
"step": 2990
},
{
"epoch": 0.65,
"learning_rate": 4.875416799044003e-05,
"loss": 3.215,
"step": 3000
},
{
"epoch": 0.65,
"learning_rate": 4.874324595716706e-05,
"loss": 3.184,
"step": 3010
},
{
"epoch": 0.65,
"learning_rate": 4.873447489774682e-05,
"loss": 3.1474,
"step": 3020
},
{
"epoch": 0.65,
"learning_rate": 4.8723469300479785e-05,
"loss": 3.1692,
"step": 3030
},
{
"epoch": 0.66,
"learning_rate": 4.871241730886976e-05,
"loss": 3.1671,
"step": 3040
},
{
"epoch": 0.66,
"learning_rate": 4.8701318944530346e-05,
"loss": 3.1815,
"step": 3050
},
{
"epoch": 0.66,
"learning_rate": 4.869017422916584e-05,
"loss": 3.1612,
"step": 3060
},
{
"epoch": 0.66,
"learning_rate": 4.867898318457121e-05,
"loss": 3.184,
"step": 3070
},
{
"epoch": 0.66,
"learning_rate": 4.866774583263198e-05,
"loss": 3.2072,
"step": 3080
},
{
"epoch": 0.67,
"learning_rate": 4.865646219532428e-05,
"loss": 3.138,
"step": 3090
},
{
"epoch": 0.67,
"learning_rate": 4.8645132294714726e-05,
"loss": 3.2044,
"step": 3100
},
{
"epoch": 0.67,
"learning_rate": 4.8633756152960416e-05,
"loss": 3.1735,
"step": 3110
},
{
"epoch": 0.67,
"learning_rate": 4.862233379230889e-05,
"loss": 3.1836,
"step": 3120
},
{
"epoch": 0.68,
"learning_rate": 4.861086523509806e-05,
"loss": 3.1728,
"step": 3130
},
{
"epoch": 0.68,
"learning_rate": 4.8599350503756194e-05,
"loss": 3.2208,
"step": 3140
},
{
"epoch": 0.68,
"learning_rate": 4.8588947785276195e-05,
"loss": 3.1733,
"step": 3150
},
{
"epoch": 0.68,
"learning_rate": 4.857734538519858e-05,
"loss": 3.1522,
"step": 3160
},
{
"epoch": 0.68,
"learning_rate": 4.856569687654237e-05,
"loss": 3.1948,
"step": 3170
},
{
"epoch": 0.69,
"learning_rate": 4.855517381474304e-05,
"loss": 3.1675,
"step": 3180
},
{
"epoch": 0.69,
"learning_rate": 4.8543437762621386e-05,
"loss": 3.1679,
"step": 3190
},
{
"epoch": 0.69,
"learning_rate": 4.853165566823192e-05,
"loss": 3.1516,
"step": 3200
},
{
"epoch": 0.69,
"learning_rate": 4.851982755461607e-05,
"loss": 3.1426,
"step": 3210
},
{
"epoch": 0.7,
"learning_rate": 4.8507953444905264e-05,
"loss": 3.1782,
"step": 3220
},
{
"epoch": 0.7,
"learning_rate": 4.849722743869494e-05,
"loss": 3.1538,
"step": 3230
},
{
"epoch": 0.7,
"learning_rate": 4.848526600045288e-05,
"loss": 3.1662,
"step": 3240
},
{
"epoch": 0.7,
"learning_rate": 4.8473258633705484e-05,
"loss": 3.1515,
"step": 3250
},
{
"epoch": 0.7,
"learning_rate": 4.846120536193475e-05,
"loss": 3.1484,
"step": 3260
},
{
"epoch": 0.71,
"learning_rate": 4.8449106208712394e-05,
"loss": 3.1709,
"step": 3270
},
{
"epoch": 0.71,
"learning_rate": 4.84369611976999e-05,
"loss": 3.1623,
"step": 3280
},
{
"epoch": 0.71,
"learning_rate": 4.842477035264843e-05,
"loss": 3.1562,
"step": 3290
},
{
"epoch": 0.71,
"learning_rate": 4.8412533697398764e-05,
"loss": 3.1588,
"step": 3300
},
{
"epoch": 0.71,
"learning_rate": 4.840025125588129e-05,
"loss": 3.171,
"step": 3310
},
{
"epoch": 0.72,
"learning_rate": 4.8387923052115926e-05,
"loss": 3.1413,
"step": 3320
},
{
"epoch": 0.72,
"learning_rate": 4.8375549110212084e-05,
"loss": 3.1417,
"step": 3330
},
{
"epoch": 0.72,
"learning_rate": 4.8363129454368616e-05,
"loss": 3.2394,
"step": 3340
},
{
"epoch": 0.72,
"learning_rate": 4.8350664108873786e-05,
"loss": 3.1296,
"step": 3350
},
{
"epoch": 0.73,
"learning_rate": 4.83381530981052e-05,
"loss": 3.2056,
"step": 3360
},
{
"epoch": 0.73,
"learning_rate": 4.832559644652979e-05,
"loss": 3.1453,
"step": 3370
},
{
"epoch": 0.73,
"learning_rate": 4.831425645751591e-05,
"loss": 3.1162,
"step": 3380
},
{
"epoch": 0.73,
"learning_rate": 4.8301613156133496e-05,
"loss": 3.1353,
"step": 3390
},
{
"epoch": 0.73,
"learning_rate": 4.828892428540287e-05,
"loss": 3.1632,
"step": 3400
},
{
"epoch": 0.74,
"learning_rate": 4.827618987013879e-05,
"loss": 3.187,
"step": 3410
},
{
"epoch": 0.74,
"learning_rate": 4.826340993524506e-05,
"loss": 3.1333,
"step": 3420
},
{
"epoch": 0.74,
"learning_rate": 4.825058450571453e-05,
"loss": 3.1768,
"step": 3430
},
{
"epoch": 0.74,
"learning_rate": 4.823771360662897e-05,
"loss": 3.1368,
"step": 3440
},
{
"epoch": 0.74,
"learning_rate": 4.8224797263159115e-05,
"loss": 3.1721,
"step": 3450
},
{
"epoch": 0.75,
"learning_rate": 4.821183550056457e-05,
"loss": 3.1955,
"step": 3460
},
{
"epoch": 0.75,
"learning_rate": 4.8200131101826504e-05,
"loss": 3.1425,
"step": 3470
},
{
"epoch": 0.75,
"learning_rate": 4.818708311280332e-05,
"loss": 3.1404,
"step": 3480
},
{
"epoch": 0.75,
"learning_rate": 4.817398977841039e-05,
"loss": 3.1465,
"step": 3490
},
{
"epoch": 0.76,
"learning_rate": 4.816085112425344e-05,
"loss": 3.163,
"step": 3500
},
{
"epoch": 0.76,
"learning_rate": 4.814766717602681e-05,
"loss": 3.1447,
"step": 3510
},
{
"epoch": 0.76,
"learning_rate": 4.813443795951347e-05,
"loss": 3.1679,
"step": 3520
},
{
"epoch": 0.76,
"learning_rate": 4.8121163500584845e-05,
"loss": 3.1549,
"step": 3530
},
{
"epoch": 0.76,
"learning_rate": 4.810784382520088e-05,
"loss": 3.1341,
"step": 3540
},
{
"epoch": 0.77,
"learning_rate": 4.809447895940996e-05,
"loss": 3.1537,
"step": 3550
},
{
"epoch": 0.77,
"learning_rate": 4.808106892934881e-05,
"loss": 3.1142,
"step": 3560
},
{
"epoch": 0.77,
"learning_rate": 4.80676137612425e-05,
"loss": 3.149,
"step": 3570
},
{
"epoch": 0.77,
"learning_rate": 4.805411348140436e-05,
"loss": 3.1652,
"step": 3580
},
{
"epoch": 0.78,
"learning_rate": 4.804056811623596e-05,
"loss": 3.2012,
"step": 3590
},
{
"epoch": 0.78,
"learning_rate": 4.802697769222703e-05,
"loss": 3.1313,
"step": 3600
},
{
"epoch": 0.78,
"learning_rate": 4.801334223595542e-05,
"loss": 3.1533,
"step": 3610
},
{
"epoch": 0.78,
"learning_rate": 4.799966177408703e-05,
"loss": 3.1505,
"step": 3620
},
{
"epoch": 0.78,
"learning_rate": 4.798593633337582e-05,
"loss": 3.1828,
"step": 3630
},
{
"epoch": 0.79,
"learning_rate": 4.797216594066366e-05,
"loss": 3.1421,
"step": 3640
},
{
"epoch": 0.79,
"learning_rate": 4.7958350622880357e-05,
"loss": 3.1798,
"step": 3650
},
{
"epoch": 0.79,
"learning_rate": 4.794449040704356e-05,
"loss": 3.1171,
"step": 3660
},
{
"epoch": 0.79,
"learning_rate": 4.793336992598755e-05,
"loss": 3.1647,
"step": 3670
},
{
"epoch": 0.79,
"learning_rate": 4.791942896201923e-05,
"loss": 3.177,
"step": 3680
},
{
"epoch": 0.8,
"learning_rate": 4.790544317611381e-05,
"loss": 3.1116,
"step": 3690
},
{
"epoch": 0.8,
"learning_rate": 4.789141259562233e-05,
"loss": 3.1357,
"step": 3700
},
{
"epoch": 0.8,
"learning_rate": 4.7877337247983435e-05,
"loss": 3.1441,
"step": 3710
},
{
"epoch": 0.8,
"learning_rate": 4.786321716072328e-05,
"loss": 3.179,
"step": 3720
},
{
"epoch": 0.81,
"learning_rate": 4.785047085263387e-05,
"loss": 3.135,
"step": 3730
},
{
"epoch": 0.81,
"learning_rate": 4.783626583624129e-05,
"loss": 3.1503,
"step": 3740
},
{
"epoch": 0.81,
"learning_rate": 4.782201616054798e-05,
"loss": 3.0998,
"step": 3750
},
{
"epoch": 0.81,
"learning_rate": 4.780772185342103e-05,
"loss": 3.1063,
"step": 3760
},
{
"epoch": 0.81,
"learning_rate": 4.779338294281483e-05,
"loss": 3.1249,
"step": 3770
},
{
"epoch": 0.82,
"learning_rate": 4.777899945677099e-05,
"loss": 3.1378,
"step": 3780
},
{
"epoch": 0.82,
"learning_rate": 4.77645714234183e-05,
"loss": 3.1911,
"step": 3790
},
{
"epoch": 0.82,
"learning_rate": 4.775009887097267e-05,
"loss": 3.1606,
"step": 3800
},
{
"epoch": 0.82,
"learning_rate": 4.7735581827737054e-05,
"loss": 3.1478,
"step": 3810
},
{
"epoch": 0.82,
"learning_rate": 4.772102032210143e-05,
"loss": 3.1734,
"step": 3820
},
{
"epoch": 0.83,
"learning_rate": 4.770641438254272e-05,
"loss": 3.1806,
"step": 3830
},
{
"epoch": 0.83,
"learning_rate": 4.7691764037624754e-05,
"loss": 3.1529,
"step": 3840
},
{
"epoch": 0.83,
"learning_rate": 4.76770693159982e-05,
"loss": 3.1503,
"step": 3850
},
{
"epoch": 0.83,
"learning_rate": 4.766233024640049e-05,
"loss": 3.139,
"step": 3860
},
{
"epoch": 0.84,
"learning_rate": 4.764754685765581e-05,
"loss": 3.1535,
"step": 3870
},
{
"epoch": 0.84,
"learning_rate": 4.7632719178675015e-05,
"loss": 3.1306,
"step": 3880
},
{
"epoch": 0.84,
"learning_rate": 4.7617847238455555e-05,
"loss": 3.1388,
"step": 3890
},
{
"epoch": 0.84,
"learning_rate": 4.7602931066081454e-05,
"loss": 3.1636,
"step": 3900
},
{
"epoch": 0.84,
"learning_rate": 4.758797069072324e-05,
"loss": 3.1396,
"step": 3910
},
{
"epoch": 0.85,
"learning_rate": 4.757446858352852e-05,
"loss": 3.1451,
"step": 3920
},
{
"epoch": 0.85,
"learning_rate": 4.755942430317483e-05,
"loss": 3.1078,
"step": 3930
},
{
"epoch": 0.85,
"learning_rate": 4.754433590492019e-05,
"loss": 3.1252,
"step": 3940
},
{
"epoch": 0.85,
"learning_rate": 4.752920341827192e-05,
"loss": 3.122,
"step": 3950
},
{
"epoch": 0.85,
"learning_rate": 4.751402687282358e-05,
"loss": 3.1281,
"step": 3960
},
{
"epoch": 0.86,
"learning_rate": 4.7498806298254886e-05,
"loss": 3.1505,
"step": 3970
},
{
"epoch": 0.86,
"learning_rate": 4.748354172433166e-05,
"loss": 3.1262,
"step": 3980
},
{
"epoch": 0.86,
"learning_rate": 4.746823318090578e-05,
"loss": 3.1453,
"step": 3990
},
{
"epoch": 0.86,
"learning_rate": 4.745288069791508e-05,
"loss": 3.1428,
"step": 4000
},
{
"epoch": 0.87,
"learning_rate": 4.743748430538336e-05,
"loss": 3.1775,
"step": 4010
},
{
"epoch": 0.87,
"learning_rate": 4.742204403342029e-05,
"loss": 3.1493,
"step": 4020
},
{
"epoch": 0.87,
"learning_rate": 4.740655991222131e-05,
"loss": 3.1046,
"step": 4030
},
{
"epoch": 0.87,
"learning_rate": 4.73925867370712e-05,
"loss": 3.1697,
"step": 4040
},
{
"epoch": 0.87,
"learning_rate": 4.737857809070107e-05,
"loss": 3.1546,
"step": 4050
},
{
"epoch": 0.88,
"learning_rate": 4.736297135301117e-05,
"loss": 3.1643,
"step": 4060
},
{
"epoch": 0.88,
"learning_rate": 4.734732088160974e-05,
"loss": 3.1166,
"step": 4070
},
{
"epoch": 0.88,
"learning_rate": 4.733162670710329e-05,
"loss": 3.1506,
"step": 4080
},
{
"epoch": 0.88,
"learning_rate": 4.731588886018383e-05,
"loss": 3.1268,
"step": 4090
},
{
"epoch": 0.89,
"learning_rate": 4.730010737162879e-05,
"loss": 3.0951,
"step": 4100
},
{
"epoch": 0.89,
"learning_rate": 4.7284282272300914e-05,
"loss": 3.1179,
"step": 4110
},
{
"epoch": 0.89,
"learning_rate": 4.726841359314828e-05,
"loss": 3.0883,
"step": 4120
},
{
"epoch": 0.89,
"learning_rate": 4.725250136520413e-05,
"loss": 3.1593,
"step": 4130
},
{
"epoch": 0.89,
"learning_rate": 4.723654561958691e-05,
"loss": 3.0923,
"step": 4140
},
{
"epoch": 0.9,
"learning_rate": 4.722374971133446e-05,
"loss": 3.1415,
"step": 4150
},
{
"epoch": 0.9,
"learning_rate": 4.720771571259582e-05,
"loss": 3.1356,
"step": 4160
},
{
"epoch": 0.9,
"learning_rate": 4.719324798010965e-05,
"loss": 3.1327,
"step": 4170
},
{
"epoch": 0.9,
"learning_rate": 4.7177131491082236e-05,
"loss": 3.133,
"step": 4180
},
{
"epoch": 0.9,
"learning_rate": 4.71609716317774e-05,
"loss": 3.1468,
"step": 4190
},
{
"epoch": 0.91,
"learning_rate": 4.7144768433797836e-05,
"loss": 3.1678,
"step": 4200
},
{
"epoch": 0.91,
"learning_rate": 4.712852192883104e-05,
"loss": 3.1353,
"step": 4210
},
{
"epoch": 0.91,
"learning_rate": 4.711223214864916e-05,
"loss": 3.1122,
"step": 4220
},
{
"epoch": 0.91,
"learning_rate": 4.709589912510899e-05,
"loss": 3.1193,
"step": 4230
},
{
"epoch": 0.92,
"learning_rate": 4.70795228901519e-05,
"loss": 3.1715,
"step": 4240
},
{
"epoch": 0.92,
"learning_rate": 4.706310347580375e-05,
"loss": 3.1365,
"step": 4250
},
{
"epoch": 0.92,
"learning_rate": 4.704664091417485e-05,
"loss": 3.1115,
"step": 4260
},
{
"epoch": 0.92,
"learning_rate": 4.703178774439096e-05,
"loss": 3.1663,
"step": 4270
},
{
"epoch": 0.92,
"learning_rate": 4.70152432916947e-05,
"loss": 3.1371,
"step": 4280
},
{
"epoch": 0.93,
"learning_rate": 4.699865578531452e-05,
"loss": 3.0817,
"step": 4290
},
{
"epoch": 0.93,
"learning_rate": 4.698202525768948e-05,
"loss": 3.1363,
"step": 4300
},
{
"epoch": 0.93,
"learning_rate": 4.696535174134272e-05,
"loss": 3.1297,
"step": 4310
},
{
"epoch": 0.93,
"learning_rate": 4.6948635268881504e-05,
"loss": 3.1488,
"step": 4320
},
{
"epoch": 0.93,
"learning_rate": 4.6931875872997064e-05,
"loss": 3.132,
"step": 4330
},
{
"epoch": 0.94,
"learning_rate": 4.69150735864646e-05,
"loss": 3.154,
"step": 4340
},
{
"epoch": 0.94,
"learning_rate": 4.689822844214316e-05,
"loss": 3.1491,
"step": 4350
},
{
"epoch": 0.94,
"learning_rate": 4.6881340472975646e-05,
"loss": 3.0787,
"step": 4360
},
{
"epoch": 0.94,
"learning_rate": 4.686779928594354e-05,
"loss": 3.1253,
"step": 4370
},
{
"epoch": 0.95,
"learning_rate": 4.685083431533641e-05,
"loss": 3.1187,
"step": 4380
},
{
"epoch": 0.95,
"learning_rate": 4.68338266125686e-05,
"loss": 3.1055,
"step": 4390
},
{
"epoch": 0.95,
"learning_rate": 4.6816776210900894e-05,
"loss": 3.0825,
"step": 4400
},
{
"epoch": 0.95,
"learning_rate": 4.679968314367758e-05,
"loss": 3.1393,
"step": 4410
},
{
"epoch": 0.95,
"learning_rate": 4.678254744432638e-05,
"loss": 3.0972,
"step": 4420
},
{
"epoch": 0.96,
"learning_rate": 4.676536914635841e-05,
"loss": 3.1343,
"step": 4430
},
{
"epoch": 0.96,
"learning_rate": 4.6748148283368054e-05,
"loss": 3.0902,
"step": 4440
},
{
"epoch": 0.96,
"learning_rate": 4.673088488903297e-05,
"loss": 3.1089,
"step": 4450
},
{
"epoch": 0.96,
"learning_rate": 4.671357899711397e-05,
"loss": 3.0771,
"step": 4460
},
{
"epoch": 0.97,
"learning_rate": 4.6697967386922925e-05,
"loss": 3.0693,
"step": 4470
},
{
"epoch": 0.97,
"learning_rate": 4.6680580842903174e-05,
"loss": 3.1108,
"step": 4480
},
{
"epoch": 0.97,
"learning_rate": 4.666315189967564e-05,
"loss": 3.1686,
"step": 4490
},
{
"epoch": 0.97,
"learning_rate": 4.6645680591324913e-05,
"loss": 3.1616,
"step": 4500
},
{
"epoch": 0.97,
"learning_rate": 4.66281669520184e-05,
"loss": 3.1161,
"step": 4510
},
{
"epoch": 0.98,
"learning_rate": 4.6610611016006335e-05,
"loss": 3.1232,
"step": 4520
},
{
"epoch": 0.98,
"learning_rate": 4.659301281762162e-05,
"loss": 3.1393,
"step": 4530
},
{
"epoch": 0.98,
"learning_rate": 4.657537239127985e-05,
"loss": 3.1409,
"step": 4540
},
{
"epoch": 0.98,
"learning_rate": 4.655768977147916e-05,
"loss": 3.1348,
"step": 4550
},
{
"epoch": 0.98,
"learning_rate": 4.653996499280025e-05,
"loss": 3.1567,
"step": 4560
},
{
"epoch": 0.99,
"learning_rate": 4.652219808990622e-05,
"loss": 3.1217,
"step": 4570
},
{
"epoch": 0.99,
"learning_rate": 4.650438909754258e-05,
"loss": 3.1763,
"step": 4580
},
{
"epoch": 0.99,
"learning_rate": 4.648653805053713e-05,
"loss": 3.1393,
"step": 4590
},
{
"epoch": 0.99,
"learning_rate": 4.646864498379993e-05,
"loss": 3.1049,
"step": 4600
},
{
"epoch": 1.0,
"learning_rate": 4.645070993232321e-05,
"loss": 3.1234,
"step": 4610
},
{
"epoch": 1.0,
"learning_rate": 4.6434532518029126e-05,
"loss": 3.093,
"step": 4620
},
{
"epoch": 1.0,
"learning_rate": 4.6416517792244917e-05,
"loss": 3.1365,
"step": 4630
},
{
"epoch": 1.0,
"eval_loss": 3.1079609394073486,
"eval_runtime": 272.7311,
"eval_samples_per_second": 543.466,
"eval_steps_per_second": 16.984,
"step": 4632
},
{
"epoch": 1.0,
"learning_rate": 4.639846118366273e-05,
"loss": 3.0849,
"step": 4640
},
{
"epoch": 1.0,
"learning_rate": 4.6380362727594603e-05,
"loss": 3.0776,
"step": 4650
},
{
"epoch": 1.01,
"learning_rate": 4.6362222459434447e-05,
"loss": 3.1569,
"step": 4660
},
{
"epoch": 1.01,
"learning_rate": 4.634404041465791e-05,
"loss": 3.1195,
"step": 4670
},
{
"epoch": 1.01,
"learning_rate": 4.632581662882237e-05,
"loss": 3.0673,
"step": 4680
},
{
"epoch": 1.01,
"learning_rate": 4.6307551137566814e-05,
"loss": 3.088,
"step": 4690
},
{
"epoch": 1.01,
"learning_rate": 4.629107656682405e-05,
"loss": 3.1197,
"step": 4700
},
{
"epoch": 1.02,
"learning_rate": 4.6272731933747945e-05,
"loss": 3.1405,
"step": 4710
},
{
"epoch": 1.02,
"learning_rate": 4.625434569906587e-05,
"loss": 3.0643,
"step": 4720
},
{
"epoch": 1.02,
"learning_rate": 4.6235917898734526e-05,
"loss": 3.1227,
"step": 4730
},
{
"epoch": 1.02,
"learning_rate": 4.6217448568791874e-05,
"loss": 3.1433,
"step": 4740
},
{
"epoch": 1.03,
"learning_rate": 4.619893774535711e-05,
"loss": 3.0972,
"step": 4750
},
{
"epoch": 1.03,
"learning_rate": 4.6180385464630546e-05,
"loss": 3.0861,
"step": 4760
},
{
"epoch": 1.03,
"learning_rate": 4.616179176289361e-05,
"loss": 3.078,
"step": 4770
},
{
"epoch": 1.03,
"learning_rate": 4.614315667650872e-05,
"loss": 3.1223,
"step": 4780
},
{
"epoch": 1.03,
"learning_rate": 4.61244802419192e-05,
"loss": 3.1382,
"step": 4790
},
{
"epoch": 1.04,
"learning_rate": 4.610576249564926e-05,
"loss": 3.1189,
"step": 4800
},
{
"epoch": 1.04,
"learning_rate": 4.608700347430392e-05,
"loss": 3.1274,
"step": 4810
},
{
"epoch": 1.04,
"learning_rate": 4.606820321456887e-05,
"loss": 3.1188,
"step": 4820
},
{
"epoch": 1.04,
"learning_rate": 4.6049361753210496e-05,
"loss": 3.1322,
"step": 4830
},
{
"epoch": 1.04,
"learning_rate": 4.603047912707572e-05,
"loss": 3.1173,
"step": 4840
},
{
"epoch": 1.05,
"learning_rate": 4.6011555373091994e-05,
"loss": 3.1036,
"step": 4850
},
{
"epoch": 1.05,
"learning_rate": 4.5992590528267185e-05,
"loss": 3.1054,
"step": 4860
},
{
"epoch": 1.05,
"learning_rate": 4.5973584629689524e-05,
"loss": 3.0934,
"step": 4870
},
{
"epoch": 1.05,
"learning_rate": 4.5954537714527534e-05,
"loss": 3.1158,
"step": 4880
},
{
"epoch": 1.06,
"learning_rate": 4.593544982002994e-05,
"loss": 3.12,
"step": 4890
},
{
"epoch": 1.06,
"learning_rate": 4.591632098352562e-05,
"loss": 3.1346,
"step": 4900
},
{
"epoch": 1.06,
"learning_rate": 4.5897151242423504e-05,
"loss": 3.1254,
"step": 4910
},
{
"epoch": 1.06,
"learning_rate": 4.5879863532983434e-05,
"loss": 3.1096,
"step": 4920
},
{
"epoch": 1.06,
"learning_rate": 4.586254274856896e-05,
"loss": 3.0445,
"step": 4930
},
{
"epoch": 1.07,
"learning_rate": 4.5843258674289204e-05,
"loss": 3.0827,
"step": 4940
},
{
"epoch": 1.07,
"learning_rate": 4.582393383829454e-05,
"loss": 3.0624,
"step": 4950
},
{
"epoch": 1.07,
"learning_rate": 4.580456827837723e-05,
"loss": 3.0551,
"step": 4960
},
{
"epoch": 1.07,
"learning_rate": 4.5785162032409127e-05,
"loss": 3.1533,
"step": 4970
},
{
"epoch": 1.08,
"learning_rate": 4.57657151383417e-05,
"loss": 3.0761,
"step": 4980
},
{
"epoch": 1.08,
"learning_rate": 4.5746227634205877e-05,
"loss": 3.0931,
"step": 4990
},
{
"epoch": 1.08,
"learning_rate": 4.572669955811203e-05,
"loss": 3.1204,
"step": 5000
},
{
"epoch": 1.08,
"learning_rate": 4.570713094824985e-05,
"loss": 3.161,
"step": 5010
},
{
"epoch": 1.08,
"learning_rate": 4.568752184288832e-05,
"loss": 3.1025,
"step": 5020
},
{
"epoch": 1.09,
"learning_rate": 4.567180542760687e-05,
"loss": 3.0956,
"step": 5030
},
{
"epoch": 1.09,
"learning_rate": 4.565212352703705e-05,
"loss": 3.061,
"step": 5040
},
{
"epoch": 1.09,
"learning_rate": 4.563240123854208e-05,
"loss": 3.0668,
"step": 5050
},
{
"epoch": 1.09,
"learning_rate": 4.561263860069146e-05,
"loss": 3.1331,
"step": 5060
},
{
"epoch": 1.09,
"learning_rate": 4.559283565213361e-05,
"loss": 3.1136,
"step": 5070
},
{
"epoch": 1.1,
"learning_rate": 4.5574978564783324e-05,
"loss": 3.1342,
"step": 5080
},
{
"epoch": 1.1,
"learning_rate": 4.5555099132640424e-05,
"loss": 3.0739,
"step": 5090
},
{
"epoch": 1.1,
"learning_rate": 4.553517950231625e-05,
"loss": 3.0902,
"step": 5100
},
{
"epoch": 1.1,
"learning_rate": 4.5515219712766224e-05,
"loss": 3.1134,
"step": 5110
},
{
"epoch": 1.11,
"learning_rate": 4.5495219803024344e-05,
"loss": 3.0601,
"step": 5120
},
{
"epoch": 1.11,
"learning_rate": 4.547517981220303e-05,
"loss": 3.077,
"step": 5130
},
{
"epoch": 1.11,
"learning_rate": 4.54550997794931e-05,
"loss": 3.1203,
"step": 5140
},
{
"epoch": 1.11,
"learning_rate": 4.54349797441637e-05,
"loss": 3.0777,
"step": 5150
},
{
"epoch": 1.11,
"learning_rate": 4.5414819745562164e-05,
"loss": 3.0827,
"step": 5160
},
{
"epoch": 1.12,
"learning_rate": 4.539461982311402e-05,
"loss": 3.0875,
"step": 5170
},
{
"epoch": 1.12,
"learning_rate": 4.5374380016322836e-05,
"loss": 3.1124,
"step": 5180
},
{
"epoch": 1.12,
"learning_rate": 4.535410036477022e-05,
"loss": 3.0654,
"step": 5190
},
{
"epoch": 1.12,
"learning_rate": 4.53358146438788e-05,
"loss": 3.0443,
"step": 5200
},
{
"epoch": 1.12,
"learning_rate": 4.5315459396605727e-05,
"loss": 3.0834,
"step": 5210
},
{
"epoch": 1.13,
"learning_rate": 4.5295064419798176e-05,
"loss": 3.1127,
"step": 5220
},
{
"epoch": 1.13,
"learning_rate": 4.527462975334119e-05,
"loss": 3.1464,
"step": 5230
},
{
"epoch": 1.13,
"learning_rate": 4.525415543719744e-05,
"loss": 3.0712,
"step": 5240
},
{
"epoch": 1.13,
"learning_rate": 4.5233641511407105e-05,
"loss": 3.0548,
"step": 5250
},
{
"epoch": 1.14,
"learning_rate": 4.521308801608786e-05,
"loss": 3.0707,
"step": 5260
},
{
"epoch": 1.14,
"learning_rate": 4.519249499143475e-05,
"loss": 3.1012,
"step": 5270
},
{
"epoch": 1.14,
"learning_rate": 4.517186247772012e-05,
"loss": 3.0869,
"step": 5280
},
{
"epoch": 1.14,
"learning_rate": 4.515119051529355e-05,
"loss": 3.1065,
"step": 5290
},
{
"epoch": 1.14,
"learning_rate": 4.513047914458176e-05,
"loss": 3.0617,
"step": 5300
},
{
"epoch": 1.15,
"learning_rate": 4.5109728406088536e-05,
"loss": 3.0979,
"step": 5310
},
{
"epoch": 1.15,
"learning_rate": 4.5091019115529945e-05,
"loss": 3.1189,
"step": 5320
},
{
"epoch": 1.15,
"learning_rate": 4.507227799957448e-05,
"loss": 3.0341,
"step": 5330
},
{
"epoch": 1.15,
"learning_rate": 4.5051417247428455e-05,
"loss": 3.0715,
"step": 5340
},
{
"epoch": 1.16,
"learning_rate": 4.5032609042076203e-05,
"loss": 3.0595,
"step": 5350
},
{
"epoch": 1.16,
"learning_rate": 4.50116738198595e-05,
"loss": 3.072,
"step": 5360
},
{
"epoch": 1.16,
"learning_rate": 4.4990699462200705e-05,
"loss": 3.1288,
"step": 5370
},
{
"epoch": 1.16,
"learning_rate": 4.496968601011791e-05,
"loss": 3.1235,
"step": 5380
},
{
"epoch": 1.16,
"learning_rate": 4.4948633504705654e-05,
"loss": 3.1167,
"step": 5390
},
{
"epoch": 1.17,
"learning_rate": 4.492754198713488e-05,
"loss": 3.1039,
"step": 5400
},
{
"epoch": 1.17,
"learning_rate": 4.4906411498652786e-05,
"loss": 3.1193,
"step": 5410
},
{
"epoch": 1.17,
"learning_rate": 4.488524208058282e-05,
"loss": 3.0753,
"step": 5420
},
{
"epoch": 1.17,
"learning_rate": 4.486403377432453e-05,
"loss": 3.0858,
"step": 5430
},
{
"epoch": 1.17,
"learning_rate": 4.4842786621353546e-05,
"loss": 3.067,
"step": 5440
},
{
"epoch": 1.18,
"learning_rate": 4.4821500663221435e-05,
"loss": 3.1419,
"step": 5450
},
{
"epoch": 1.18,
"learning_rate": 4.4800175941555675e-05,
"loss": 3.0853,
"step": 5460
},
{
"epoch": 1.18,
"learning_rate": 4.477881249805954e-05,
"loss": 3.0262,
"step": 5470
},
{
"epoch": 1.18,
"learning_rate": 4.475741037451204e-05,
"loss": 3.097,
"step": 5480
},
{
"epoch": 1.19,
"learning_rate": 4.473596961276783e-05,
"loss": 3.0484,
"step": 5490
},
{
"epoch": 1.19,
"learning_rate": 4.4714490254757105e-05,
"loss": 3.0538,
"step": 5500
},
{
"epoch": 1.19,
"learning_rate": 4.469297234248555e-05,
"loss": 3.0765,
"step": 5510
},
{
"epoch": 1.19,
"learning_rate": 4.467357329232663e-05,
"loss": 3.1038,
"step": 5520
},
{
"epoch": 1.19,
"learning_rate": 4.465198224295518e-05,
"loss": 3.0382,
"step": 5530
},
{
"epoch": 1.2,
"learning_rate": 4.4630352761565494e-05,
"loss": 3.111,
"step": 5540
},
{
"epoch": 1.2,
"learning_rate": 4.4608684890456845e-05,
"loss": 3.0495,
"step": 5550
},
{
"epoch": 1.2,
"learning_rate": 4.458697867200359e-05,
"loss": 3.0987,
"step": 5560
},
{
"epoch": 1.2,
"learning_rate": 4.456523414865507e-05,
"loss": 3.0522,
"step": 5570
},
{
"epoch": 1.2,
"learning_rate": 4.454345136293554e-05,
"loss": 3.0691,
"step": 5580
},
{
"epoch": 1.21,
"learning_rate": 4.452381417666736e-05,
"loss": 3.0845,
"step": 5590
},
{
"epoch": 1.21,
"learning_rate": 4.4501958809865176e-05,
"loss": 3.059,
"step": 5600
},
{
"epoch": 1.21,
"learning_rate": 4.448006530443517e-05,
"loss": 3.0864,
"step": 5610
},
{
"epoch": 1.21,
"learning_rate": 4.445813370319296e-05,
"loss": 3.0731,
"step": 5620
},
{
"epoch": 1.22,
"learning_rate": 4.443616404902865e-05,
"loss": 3.0632,
"step": 5630
},
{
"epoch": 1.22,
"learning_rate": 4.441415638490678e-05,
"loss": 3.0577,
"step": 5640
},
{
"epoch": 1.22,
"learning_rate": 4.43921107538662e-05,
"loss": 3.0946,
"step": 5650
},
{
"epoch": 1.22,
"learning_rate": 4.4370027199020024e-05,
"loss": 3.062,
"step": 5660
},
{
"epoch": 1.22,
"learning_rate": 4.434790576355553e-05,
"loss": 3.0902,
"step": 5670
},
{
"epoch": 1.23,
"learning_rate": 4.4325746490734075e-05,
"loss": 3.0534,
"step": 5680
},
{
"epoch": 1.23,
"learning_rate": 4.430354942389101e-05,
"loss": 3.0797,
"step": 5690
},
{
"epoch": 1.23,
"learning_rate": 4.42813146064356e-05,
"loss": 3.099,
"step": 5700
},
{
"epoch": 1.23,
"learning_rate": 4.4259042081850934e-05,
"loss": 3.1312,
"step": 5710
},
{
"epoch": 1.23,
"learning_rate": 4.423673189369384e-05,
"loss": 3.0581,
"step": 5720
},
{
"epoch": 1.24,
"learning_rate": 4.421662055805711e-05,
"loss": 3.0436,
"step": 5730
},
{
"epoch": 1.24,
"learning_rate": 4.419423892937521e-05,
"loss": 3.1371,
"step": 5740
},
{
"epoch": 1.24,
"learning_rate": 4.417181976385192e-05,
"loss": 3.1115,
"step": 5750
},
{
"epoch": 1.24,
"learning_rate": 4.414936310533083e-05,
"loss": 3.0892,
"step": 5760
},
{
"epoch": 1.25,
"learning_rate": 4.412686899772889e-05,
"loss": 3.0711,
"step": 5770
},
{
"epoch": 1.25,
"learning_rate": 4.410433748503625e-05,
"loss": 3.0844,
"step": 5780
},
{
"epoch": 1.25,
"learning_rate": 4.408176861131623e-05,
"loss": 3.1099,
"step": 5790
},
{
"epoch": 1.25,
"learning_rate": 4.4059162420705215e-05,
"loss": 3.0876,
"step": 5800
},
{
"epoch": 1.25,
"learning_rate": 4.403878497975103e-05,
"loss": 3.0698,
"step": 5810
},
{
"epoch": 1.26,
"learning_rate": 4.401610800890414e-05,
"loss": 3.0657,
"step": 5820
},
{
"epoch": 1.26,
"learning_rate": 4.3993393849574147e-05,
"loss": 3.0794,
"step": 5830
},
{
"epoch": 1.26,
"learning_rate": 4.397064254618155e-05,
"loss": 3.0834,
"step": 5840
},
{
"epoch": 1.26,
"learning_rate": 4.394785414321948e-05,
"loss": 3.0714,
"step": 5850
},
{
"epoch": 1.27,
"learning_rate": 4.392502868525366e-05,
"loss": 3.1123,
"step": 5860
},
{
"epoch": 1.27,
"learning_rate": 4.390216621692224e-05,
"loss": 3.0709,
"step": 5870
},
{
"epoch": 1.27,
"learning_rate": 4.3879266782935754e-05,
"loss": 3.0947,
"step": 5880
},
{
"epoch": 1.27,
"learning_rate": 4.3856330428077046e-05,
"loss": 3.0463,
"step": 5890
},
{
"epoch": 1.27,
"learning_rate": 4.3833357197201164e-05,
"loss": 3.0472,
"step": 5900
},
{
"epoch": 1.28,
"learning_rate": 4.3810347135235246e-05,
"loss": 3.0543,
"step": 5910
},
{
"epoch": 1.28,
"learning_rate": 4.378730028717848e-05,
"loss": 3.0449,
"step": 5920
},
{
"epoch": 1.28,
"learning_rate": 4.376421669810199e-05,
"loss": 3.0564,
"step": 5930
},
{
"epoch": 1.28,
"learning_rate": 4.3743410091670465e-05,
"loss": 3.0927,
"step": 5940
},
{
"epoch": 1.28,
"learning_rate": 4.3720256819084704e-05,
"loss": 3.04,
"step": 5950
},
{
"epoch": 1.29,
"learning_rate": 4.3697066936591475e-05,
"loss": 3.0975,
"step": 5960
},
{
"epoch": 1.29,
"learning_rate": 4.367616477835767e-05,
"loss": 3.0934,
"step": 5970
},
{
"epoch": 1.29,
"learning_rate": 4.365290546204094e-05,
"loss": 3.0913,
"step": 5980
},
{
"epoch": 1.29,
"learning_rate": 4.36296096675311e-05,
"loss": 3.0649,
"step": 5990
},
{
"epoch": 1.3,
"learning_rate": 4.360627744038616e-05,
"loss": 3.0304,
"step": 6000
},
{
"epoch": 1.3,
"learning_rate": 4.3582908826235294e-05,
"loss": 3.1249,
"step": 6010
},
{
"epoch": 1.3,
"learning_rate": 4.3559503870778916e-05,
"loss": 3.0373,
"step": 6020
},
{
"epoch": 1.3,
"learning_rate": 4.3536062619788455e-05,
"loss": 3.0379,
"step": 6030
},
{
"epoch": 1.3,
"learning_rate": 4.3512585119106344e-05,
"loss": 3.0313,
"step": 6040
},
{
"epoch": 1.31,
"learning_rate": 4.34890714146459e-05,
"loss": 3.0485,
"step": 6050
},
{
"epoch": 1.31,
"learning_rate": 4.346552155239124e-05,
"loss": 3.1057,
"step": 6060
},
{
"epoch": 1.31,
"learning_rate": 4.344193557839721e-05,
"loss": 3.0484,
"step": 6070
},
{
"epoch": 1.31,
"learning_rate": 4.342067736438663e-05,
"loss": 3.1011,
"step": 6080
},
{
"epoch": 1.31,
"learning_rate": 4.3397022905221697e-05,
"loss": 3.0748,
"step": 6090
},
{
"epoch": 1.32,
"learning_rate": 4.337333246827544e-05,
"loss": 3.0687,
"step": 6100
},
{
"epoch": 1.32,
"learning_rate": 4.334960609987759e-05,
"loss": 3.0476,
"step": 6110
},
{
"epoch": 1.32,
"learning_rate": 4.3325843846428175e-05,
"loss": 3.0799,
"step": 6120
},
{
"epoch": 1.32,
"learning_rate": 4.330204575439737e-05,
"loss": 3.0682,
"step": 6130
},
{
"epoch": 1.33,
"learning_rate": 4.327821187032547e-05,
"loss": 3.0854,
"step": 6140
},
{
"epoch": 1.33,
"learning_rate": 4.325434224082274e-05,
"loss": 3.0755,
"step": 6150
},
{
"epoch": 1.33,
"learning_rate": 4.3230436912569354e-05,
"loss": 3.0786,
"step": 6160
},
{
"epoch": 1.33,
"learning_rate": 4.3206495932315314e-05,
"loss": 3.0779,
"step": 6170
},
{
"epoch": 1.33,
"learning_rate": 4.318251934688033e-05,
"loss": 3.0299,
"step": 6180
},
{
"epoch": 1.34,
"learning_rate": 4.315850720315374e-05,
"loss": 3.0795,
"step": 6190
},
{
"epoch": 1.34,
"learning_rate": 4.3139271917756556e-05,
"loss": 3.0861,
"step": 6200
},
{
"epoch": 1.34,
"learning_rate": 4.3115195887488145e-05,
"loss": 3.0561,
"step": 6210
},
{
"epoch": 1.34,
"learning_rate": 4.309108443058796e-05,
"loss": 3.0703,
"step": 6220
},
{
"epoch": 1.34,
"learning_rate": 4.306693759420909e-05,
"loss": 3.0625,
"step": 6230
},
{
"epoch": 1.35,
"learning_rate": 4.304517523104164e-05,
"loss": 3.054,
"step": 6240
},
{
"epoch": 1.35,
"learning_rate": 4.302096130380781e-05,
"loss": 3.0711,
"step": 6250
},
{
"epoch": 1.35,
"learning_rate": 4.299671213423022e-05,
"loss": 3.0152,
"step": 6260
},
{
"epoch": 1.35,
"learning_rate": 4.297242776973133e-05,
"loss": 3.0893,
"step": 6270
},
{
"epoch": 1.36,
"learning_rate": 4.294810825780234e-05,
"loss": 3.0616,
"step": 6280
},
{
"epoch": 1.36,
"learning_rate": 4.2923753646003266e-05,
"loss": 2.9913,
"step": 6290
},
{
"epoch": 1.36,
"learning_rate": 4.289936398196272e-05,
"loss": 3.0254,
"step": 6300
},
{
"epoch": 1.36,
"learning_rate": 4.287493931337787e-05,
"loss": 3.0614,
"step": 6310
},
{
"epoch": 1.36,
"learning_rate": 4.285047968801436e-05,
"loss": 3.0724,
"step": 6320
},
{
"epoch": 1.37,
"learning_rate": 4.282598515370617e-05,
"loss": 3.0359,
"step": 6330
},
{
"epoch": 1.37,
"learning_rate": 4.280145575835556e-05,
"loss": 3.0914,
"step": 6340
},
{
"epoch": 1.37,
"learning_rate": 4.277689154993297e-05,
"loss": 3.0023,
"step": 6350
},
{
"epoch": 1.37,
"learning_rate": 4.275229257647693e-05,
"loss": 3.0514,
"step": 6360
},
{
"epoch": 1.38,
"learning_rate": 4.272765888609393e-05,
"loss": 3.0783,
"step": 6370
},
{
"epoch": 1.38,
"learning_rate": 4.2702990526958374e-05,
"loss": 3.0651,
"step": 6380
},
{
"epoch": 1.38,
"learning_rate": 4.2678287547312465e-05,
"loss": 3.0196,
"step": 6390
},
{
"epoch": 1.38,
"learning_rate": 4.26535499954661e-05,
"loss": 3.0609,
"step": 6400
},
{
"epoch": 1.38,
"learning_rate": 4.2628777919796795e-05,
"loss": 3.0467,
"step": 6410
},
{
"epoch": 1.39,
"learning_rate": 4.2606453573864236e-05,
"loss": 3.0282,
"step": 6420
},
{
"epoch": 1.39,
"learning_rate": 4.258161603645313e-05,
"loss": 3.0124,
"step": 6430
},
{
"epoch": 1.39,
"learning_rate": 4.255674411589534e-05,
"loss": 3.1188,
"step": 6440
},
{
"epoch": 1.39,
"learning_rate": 4.253433003000272e-05,
"loss": 3.037,
"step": 6450
},
{
"epoch": 1.39,
"learning_rate": 4.250939291552573e-05,
"loss": 3.0842,
"step": 6460
},
{
"epoch": 1.4,
"learning_rate": 4.248442155914378e-05,
"loss": 3.0795,
"step": 6470
},
{
"epoch": 1.4,
"learning_rate": 4.2459416009691655e-05,
"loss": 3.0428,
"step": 6480
},
{
"epoch": 1.4,
"learning_rate": 4.243437631607095e-05,
"loss": 3.086,
"step": 6490
},
{
"epoch": 1.4,
"learning_rate": 4.2409302527250064e-05,
"loss": 3.0521,
"step": 6500
},
{
"epoch": 1.41,
"learning_rate": 4.238419469226408e-05,
"loss": 3.0548,
"step": 6510
},
{
"epoch": 1.41,
"learning_rate": 4.2359052860214624e-05,
"loss": 3.0496,
"step": 6520
},
{
"epoch": 1.41,
"learning_rate": 4.233387708026984e-05,
"loss": 3.0647,
"step": 6530
},
{
"epoch": 1.41,
"learning_rate": 4.230866740166427e-05,
"loss": 3.058,
"step": 6540
},
{
"epoch": 1.41,
"learning_rate": 4.2283423873698715e-05,
"loss": 3.0689,
"step": 6550
},
{
"epoch": 1.42,
"learning_rate": 4.22581465457402e-05,
"loss": 3.0487,
"step": 6560
},
{
"epoch": 1.42,
"learning_rate": 4.223283546722184e-05,
"loss": 3.101,
"step": 6570
},
{
"epoch": 1.42,
"learning_rate": 4.220749068764275e-05,
"loss": 3.0478,
"step": 6580
},
{
"epoch": 1.42,
"learning_rate": 4.2182112256567964e-05,
"loss": 3.0707,
"step": 6590
},
{
"epoch": 1.42,
"learning_rate": 4.215670022362832e-05,
"loss": 3.075,
"step": 6600
},
{
"epoch": 1.43,
"learning_rate": 4.2131254638520366e-05,
"loss": 3.0593,
"step": 6610
},
{
"epoch": 1.43,
"learning_rate": 4.2105775551006266e-05,
"loss": 3.0803,
"step": 6620
},
{
"epoch": 1.43,
"learning_rate": 4.2080263010913704e-05,
"loss": 3.0546,
"step": 6630
},
{
"epoch": 1.43,
"learning_rate": 4.205471706813577e-05,
"loss": 3.0473,
"step": 6640
},
{
"epoch": 1.44,
"learning_rate": 4.2031697201628954e-05,
"loss": 3.1144,
"step": 6650
},
{
"epoch": 1.44,
"learning_rate": 4.2008650355877355e-05,
"loss": 3.058,
"step": 6660
},
{
"epoch": 1.44,
"learning_rate": 4.198301115156741e-05,
"loss": 3.0695,
"step": 6670
},
{
"epoch": 1.44,
"learning_rate": 4.1957338734760807e-05,
"loss": 3.0094,
"step": 6680
},
{
"epoch": 1.44,
"learning_rate": 4.193163315566334e-05,
"loss": 3.0266,
"step": 6690
},
{
"epoch": 1.45,
"learning_rate": 4.19058944645456e-05,
"loss": 3.0815,
"step": 6700
},
{
"epoch": 1.45,
"learning_rate": 4.188012271174299e-05,
"loss": 3.0812,
"step": 6710
},
{
"epoch": 1.45,
"learning_rate": 4.18543179476555e-05,
"loss": 3.0678,
"step": 6720
},
{
"epoch": 1.45,
"learning_rate": 4.1828480222747744e-05,
"loss": 3.0838,
"step": 6730
},
{
"epoch": 1.46,
"learning_rate": 4.180260958754876e-05,
"loss": 3.0508,
"step": 6740
},
{
"epoch": 1.46,
"learning_rate": 4.1779297919384794e-05,
"loss": 3.0554,
"step": 6750
},
{
"epoch": 1.46,
"learning_rate": 4.1753364894070436e-05,
"loss": 3.0131,
"step": 6760
},
{
"epoch": 1.46,
"learning_rate": 4.172739910536267e-05,
"loss": 3.0486,
"step": 6770
},
{
"epoch": 1.46,
"learning_rate": 4.1701400604041014e-05,
"loss": 3.1095,
"step": 6780
},
{
"epoch": 1.47,
"learning_rate": 4.1675369440948926e-05,
"loss": 3.0621,
"step": 6790
},
{
"epoch": 1.47,
"learning_rate": 4.1649305666993744e-05,
"loss": 3.085,
"step": 6800
},
{
"epoch": 1.47,
"learning_rate": 4.162320933314658e-05,
"loss": 3.047,
"step": 6810
},
{
"epoch": 1.47,
"learning_rate": 4.159708049044223e-05,
"loss": 3.0417,
"step": 6820
},
{
"epoch": 1.47,
"learning_rate": 4.1570919189979045e-05,
"loss": 3.0374,
"step": 6830
},
{
"epoch": 1.48,
"learning_rate": 4.1544725482918885e-05,
"loss": 3.0419,
"step": 6840
},
{
"epoch": 1.48,
"learning_rate": 4.151849942048695e-05,
"loss": 3.0809,
"step": 6850
},
{
"epoch": 1.48,
"learning_rate": 4.1492241053971734e-05,
"loss": 3.0291,
"step": 6860
},
{
"epoch": 1.48,
"learning_rate": 4.146595043472492e-05,
"loss": 3.0631,
"step": 6870
},
{
"epoch": 1.49,
"learning_rate": 4.143962761416121e-05,
"loss": 3.0517,
"step": 6880
},
{
"epoch": 1.49,
"learning_rate": 4.141327264375836e-05,
"loss": 3.0461,
"step": 6890
},
{
"epoch": 1.49,
"learning_rate": 4.138688557505693e-05,
"loss": 3.0429,
"step": 6900
},
{
"epoch": 1.49,
"learning_rate": 4.136046645966029e-05,
"loss": 3.0669,
"step": 6910
},
{
"epoch": 1.49,
"learning_rate": 4.1334015349234464e-05,
"loss": 3.0467,
"step": 6920
},
{
"epoch": 1.5,
"learning_rate": 4.130753229550807e-05,
"loss": 3.0538,
"step": 6930
},
{
"epoch": 1.5,
"learning_rate": 4.128101735027216e-05,
"loss": 3.0383,
"step": 6940
},
{
"epoch": 1.5,
"learning_rate": 4.1257126675174786e-05,
"loss": 3.0722,
"step": 6950
},
{
"epoch": 1.5,
"learning_rate": 4.1230551278978504e-05,
"loss": 3.0487,
"step": 6960
},
{
"epoch": 1.5,
"learning_rate": 4.120660628239594e-05,
"loss": 3.0318,
"step": 6970
},
{
"epoch": 1.51,
"learning_rate": 4.118263561356283e-05,
"loss": 3.0792,
"step": 6980
},
{
"epoch": 1.51,
"learning_rate": 4.115597147383657e-05,
"loss": 3.0969,
"step": 6990
},
{
"epoch": 1.51,
"learning_rate": 4.11292757389978e-05,
"loss": 3.0431,
"step": 7000
},
{
"epoch": 1.51,
"learning_rate": 4.1102548461253505e-05,
"loss": 3.0727,
"step": 7010
},
{
"epoch": 1.52,
"learning_rate": 4.1075789692872355e-05,
"loss": 3.0723,
"step": 7020
},
{
"epoch": 1.52,
"learning_rate": 4.104899948618464e-05,
"loss": 3.0863,
"step": 7030
},
{
"epoch": 1.52,
"learning_rate": 4.102217789358208e-05,
"loss": 3.0275,
"step": 7040
},
{
"epoch": 1.52,
"learning_rate": 4.099532496751782e-05,
"loss": 3.041,
"step": 7050
},
{
"epoch": 1.52,
"learning_rate": 4.0968440760506263e-05,
"loss": 3.0633,
"step": 7060
},
{
"epoch": 1.53,
"learning_rate": 4.0941525325122995e-05,
"loss": 3.0495,
"step": 7070
},
{
"epoch": 1.53,
"learning_rate": 4.091457871400465e-05,
"loss": 3.0351,
"step": 7080
},
{
"epoch": 1.53,
"learning_rate": 4.089030015229798e-05,
"loss": 3.0179,
"step": 7090
},
{
"epoch": 1.53,
"learning_rate": 4.086329445251524e-05,
"loss": 3.05,
"step": 7100
},
{
"epoch": 1.53,
"learning_rate": 4.083625772998813e-05,
"loss": 3.0678,
"step": 7110
},
{
"epoch": 1.54,
"learning_rate": 4.080919003759047e-05,
"loss": 3.033,
"step": 7120
},
{
"epoch": 1.54,
"learning_rate": 4.078480267894231e-05,
"loss": 3.0777,
"step": 7130
},
{
"epoch": 1.54,
"learning_rate": 4.0757676289674906e-05,
"loss": 3.0484,
"step": 7140
},
{
"epoch": 1.54,
"learning_rate": 4.073051908421324e-05,
"loss": 3.0277,
"step": 7150
},
{
"epoch": 1.55,
"learning_rate": 4.070333111566676e-05,
"loss": 3.0512,
"step": 7160
},
{
"epoch": 1.55,
"learning_rate": 4.06761124372051e-05,
"loss": 3.0378,
"step": 7170
},
{
"epoch": 1.55,
"learning_rate": 4.064886310205795e-05,
"loss": 3.0327,
"step": 7180
},
{
"epoch": 1.55,
"learning_rate": 4.062158316351493e-05,
"loss": 3.0072,
"step": 7190
},
{
"epoch": 1.55,
"learning_rate": 4.0594272674925524e-05,
"loss": 3.0332,
"step": 7200
},
{
"epoch": 1.56,
"learning_rate": 4.056693168969896e-05,
"loss": 3.0233,
"step": 7210
},
{
"epoch": 1.56,
"learning_rate": 4.053956026130412e-05,
"loss": 3.0654,
"step": 7220
},
{
"epoch": 1.56,
"learning_rate": 4.05121584432694e-05,
"loss": 3.0563,
"step": 7230
},
{
"epoch": 1.56,
"learning_rate": 4.048472628918262e-05,
"loss": 3.0622,
"step": 7240
},
{
"epoch": 1.57,
"learning_rate": 4.045726385269096e-05,
"loss": 3.0415,
"step": 7250
},
{
"epoch": 1.57,
"learning_rate": 4.042977118750079e-05,
"loss": 3.0955,
"step": 7260
},
{
"epoch": 1.57,
"learning_rate": 4.040224834737761e-05,
"loss": 3.0256,
"step": 7270
},
{
"epoch": 1.57,
"learning_rate": 4.0374695386145933e-05,
"loss": 3.0421,
"step": 7280
},
{
"epoch": 1.57,
"learning_rate": 4.034711235768918e-05,
"loss": 3.0532,
"step": 7290
},
{
"epoch": 1.58,
"learning_rate": 4.031949931594955e-05,
"loss": 3.063,
"step": 7300
},
{
"epoch": 1.58,
"learning_rate": 4.0291856314927954e-05,
"loss": 3.0099,
"step": 7310
},
{
"epoch": 1.58,
"learning_rate": 4.0264183408683903e-05,
"loss": 3.0301,
"step": 7320
},
{
"epoch": 1.58,
"learning_rate": 4.023925226882636e-05,
"loss": 3.0432,
"step": 7330
},
{
"epoch": 1.58,
"learning_rate": 4.0214296989065886e-05,
"loss": 3.0542,
"step": 7340
},
{
"epoch": 1.59,
"learning_rate": 4.018654063629289e-05,
"loss": 3.067,
"step": 7350
},
{
"epoch": 1.59,
"learning_rate": 4.0158754584256e-05,
"loss": 3.0914,
"step": 7360
},
{
"epoch": 1.59,
"learning_rate": 4.0130938887294466e-05,
"loss": 3.0195,
"step": 7370
},
{
"epoch": 1.59,
"learning_rate": 4.010309359980553e-05,
"loss": 3.0403,
"step": 7380
},
{
"epoch": 1.6,
"learning_rate": 4.007521877624427e-05,
"loss": 3.0245,
"step": 7390
},
{
"epoch": 1.6,
"learning_rate": 4.004731447112356e-05,
"loss": 3.0852,
"step": 7400
},
{
"epoch": 1.6,
"learning_rate": 4.00193807390139e-05,
"loss": 3.0785,
"step": 7410
},
{
"epoch": 1.6,
"learning_rate": 3.999141763454336e-05,
"loss": 3.0489,
"step": 7420
},
{
"epoch": 1.6,
"learning_rate": 3.9963425212397455e-05,
"loss": 3.0671,
"step": 7430
},
{
"epoch": 1.61,
"learning_rate": 3.9938207011097455e-05,
"loss": 3.0062,
"step": 7440
},
{
"epoch": 1.61,
"learning_rate": 3.9912965146768536e-05,
"loss": 3.0455,
"step": 7450
},
{
"epoch": 1.61,
"learning_rate": 3.9884890926546065e-05,
"loss": 3.0543,
"step": 7460
},
{
"epoch": 1.61,
"learning_rate": 3.985678759697511e-05,
"loss": 3.0498,
"step": 7470
},
{
"epoch": 1.61,
"learning_rate": 3.98286552130154e-05,
"loss": 3.0114,
"step": 7480
},
{
"epoch": 1.62,
"learning_rate": 3.98004938296835e-05,
"loss": 3.0207,
"step": 7490
},
{
"epoch": 1.62,
"learning_rate": 3.9772303502052644e-05,
"loss": 3.0383,
"step": 7500
},
{
"epoch": 1.62,
"learning_rate": 3.9744084285252725e-05,
"loss": 3.0651,
"step": 7510
},
{
"epoch": 1.62,
"learning_rate": 3.971583623447009e-05,
"loss": 3.0663,
"step": 7520
},
{
"epoch": 1.63,
"learning_rate": 3.9687559404947516e-05,
"loss": 3.0387,
"step": 7530
},
{
"epoch": 1.63,
"learning_rate": 3.9659253851984024e-05,
"loss": 3.0595,
"step": 7540
},
{
"epoch": 1.63,
"learning_rate": 3.963375434152481e-05,
"loss": 3.035,
"step": 7550
},
{
"epoch": 1.63,
"learning_rate": 3.9611068648359855e-05,
"loss": 2.9916,
"step": 7560
},
{
"epoch": 1.63,
"learning_rate": 3.9582685818763774e-05,
"loss": 3.0476,
"step": 7570
},
{
"epoch": 1.64,
"learning_rate": 3.955427447082076e-05,
"loss": 3.0544,
"step": 7580
},
{
"epoch": 1.64,
"learning_rate": 3.952583466009293e-05,
"loss": 3.0423,
"step": 7590
},
{
"epoch": 1.64,
"learning_rate": 3.949736644219805e-05,
"loss": 3.0426,
"step": 7600
},
{
"epoch": 1.64,
"learning_rate": 3.946886987280942e-05,
"loss": 3.0483,
"step": 7610
},
{
"epoch": 1.65,
"learning_rate": 3.944034500765582e-05,
"loss": 3.0314,
"step": 7620
},
{
"epoch": 1.65,
"learning_rate": 3.9411791902521354e-05,
"loss": 3.0207,
"step": 7630
},
{
"epoch": 1.65,
"learning_rate": 3.9383210613245335e-05,
"loss": 3.058,
"step": 7640
},
{
"epoch": 1.65,
"learning_rate": 3.9354601195722216e-05,
"loss": 3.025,
"step": 7650
},
{
"epoch": 1.65,
"learning_rate": 3.932596370590145e-05,
"loss": 2.9888,
"step": 7660
},
{
"epoch": 1.66,
"learning_rate": 3.929729819978738e-05,
"loss": 3.0564,
"step": 7670
},
{
"epoch": 1.66,
"learning_rate": 3.926860473343916e-05,
"loss": 3.0287,
"step": 7680
},
{
"epoch": 1.66,
"learning_rate": 3.9239883362970596e-05,
"loss": 3.0634,
"step": 7690
},
{
"epoch": 1.66,
"learning_rate": 3.92111341445501e-05,
"loss": 2.999,
"step": 7700
},
{
"epoch": 1.66,
"learning_rate": 3.918235713440051e-05,
"loss": 2.9842,
"step": 7710
},
{
"epoch": 1.67,
"learning_rate": 3.9153552388799025e-05,
"loss": 3.0718,
"step": 7720
},
{
"epoch": 1.67,
"learning_rate": 3.912471996407709e-05,
"loss": 3.0234,
"step": 7730
},
{
"epoch": 1.67,
"learning_rate": 3.9095859916620296e-05,
"loss": 3.0262,
"step": 7740
},
{
"epoch": 1.67,
"learning_rate": 3.906697230286821e-05,
"loss": 3.004,
"step": 7750
},
{
"epoch": 1.68,
"learning_rate": 3.903805717931435e-05,
"loss": 3.0779,
"step": 7760
},
{
"epoch": 1.68,
"learning_rate": 3.9014905311411966e-05,
"loss": 2.989,
"step": 7770
},
{
"epoch": 1.68,
"learning_rate": 3.8985940812750175e-05,
"loss": 3.0635,
"step": 7780
},
{
"epoch": 1.68,
"learning_rate": 3.895984937694845e-05,
"loss": 3.0626,
"step": 7790
},
{
"epoch": 1.68,
"learning_rate": 3.893083295922656e-05,
"loss": 3.0518,
"step": 7800
},
{
"epoch": 1.69,
"learning_rate": 3.890178929794116e-05,
"loss": 3.0396,
"step": 7810
},
{
"epoch": 1.69,
"learning_rate": 3.887271844989092e-05,
"loss": 3.016,
"step": 7820
},
{
"epoch": 1.69,
"learning_rate": 3.8843620471927676e-05,
"loss": 3.0663,
"step": 7830
},
{
"epoch": 1.69,
"learning_rate": 3.8814495420956346e-05,
"loss": 3.0572,
"step": 7840
},
{
"epoch": 1.69,
"learning_rate": 3.878534335393476e-05,
"loss": 3.0316,
"step": 7850
},
{
"epoch": 1.7,
"learning_rate": 3.875616432787358e-05,
"loss": 3.0399,
"step": 7860
},
{
"epoch": 1.7,
"learning_rate": 3.872695839983622e-05,
"loss": 3.04,
"step": 7870
},
{
"epoch": 1.7,
"learning_rate": 3.8697725626938675e-05,
"loss": 3.0146,
"step": 7880
},
{
"epoch": 1.7,
"learning_rate": 3.8668466066349464e-05,
"loss": 3.0186,
"step": 7890
},
{
"epoch": 1.71,
"learning_rate": 3.863917977528947e-05,
"loss": 3.0498,
"step": 7900
},
{
"epoch": 1.71,
"learning_rate": 3.860986681103187e-05,
"loss": 3.0079,
"step": 7910
},
{
"epoch": 1.71,
"learning_rate": 3.8580527230901976e-05,
"loss": 3.0093,
"step": 7920
},
{
"epoch": 1.71,
"learning_rate": 3.855116109227718e-05,
"loss": 3.0021,
"step": 7930
},
{
"epoch": 1.71,
"learning_rate": 3.8521768452586814e-05,
"loss": 3.0234,
"step": 7940
},
{
"epoch": 1.72,
"learning_rate": 3.8492349369312e-05,
"loss": 2.9796,
"step": 7950
},
{
"epoch": 1.72,
"learning_rate": 3.846290389998563e-05,
"loss": 3.0799,
"step": 7960
},
{
"epoch": 1.72,
"learning_rate": 3.843343210219213e-05,
"loss": 3.0204,
"step": 7970
},
{
"epoch": 1.72,
"learning_rate": 3.840393403356747e-05,
"loss": 2.9962,
"step": 7980
},
{
"epoch": 1.72,
"learning_rate": 3.837440975179898e-05,
"loss": 3.0499,
"step": 7990
},
{
"epoch": 1.73,
"learning_rate": 3.834781553368924e-05,
"loss": 3.0054,
"step": 8000
},
{
"epoch": 1.73,
"learning_rate": 3.832120017182745e-05,
"loss": 3.0406,
"step": 8010
},
{
"epoch": 1.73,
"learning_rate": 3.829160280058871e-05,
"loss": 3.0324,
"step": 8020
},
{
"epoch": 1.73,
"learning_rate": 3.8261979435884533e-05,
"loss": 3.0714,
"step": 8030
},
{
"epoch": 1.74,
"learning_rate": 3.823233013564726e-05,
"loss": 3.089,
"step": 8040
},
{
"epoch": 1.74,
"learning_rate": 3.8202654957860005e-05,
"loss": 3.0076,
"step": 8050
},
{
"epoch": 1.74,
"learning_rate": 3.817592522051002e-05,
"loss": 3.06,
"step": 8060
},
{
"epoch": 1.74,
"learning_rate": 3.814620103530231e-05,
"loss": 3.0004,
"step": 8070
},
{
"epoch": 1.74,
"learning_rate": 3.8116451140981295e-05,
"loss": 3.0538,
"step": 8080
},
{
"epoch": 1.75,
"learning_rate": 3.80866755957268e-05,
"loss": 3.0364,
"step": 8090
},
{
"epoch": 1.75,
"learning_rate": 3.805687445776878e-05,
"loss": 3.0766,
"step": 8100
},
{
"epoch": 1.75,
"learning_rate": 3.802704778538725e-05,
"loss": 3.0487,
"step": 8110
},
{
"epoch": 1.75,
"learning_rate": 3.7997195636912177e-05,
"loss": 2.9465,
"step": 8120
},
{
"epoch": 1.76,
"learning_rate": 3.7967318070723335e-05,
"loss": 2.9839,
"step": 8130
},
{
"epoch": 1.76,
"learning_rate": 3.793741514525019e-05,
"loss": 3.0199,
"step": 8140
},
{
"epoch": 1.76,
"learning_rate": 3.790748691897183e-05,
"loss": 3.024,
"step": 8150
},
{
"epoch": 1.76,
"learning_rate": 3.787753345041683e-05,
"loss": 3.0038,
"step": 8160
},
{
"epoch": 1.76,
"learning_rate": 3.7847554798163084e-05,
"loss": 3.0542,
"step": 8170
},
{
"epoch": 1.77,
"learning_rate": 3.781755102083777e-05,
"loss": 3.0323,
"step": 8180
},
{
"epoch": 1.77,
"learning_rate": 3.7787522177117195e-05,
"loss": 2.9868,
"step": 8190
},
{
"epoch": 1.77,
"learning_rate": 3.775746832572669e-05,
"loss": 3.0252,
"step": 8200
},
{
"epoch": 1.77,
"learning_rate": 3.7727389525440474e-05,
"loss": 2.9854,
"step": 8210
},
{
"epoch": 1.77,
"learning_rate": 3.769728583508157e-05,
"loss": 3.0464,
"step": 8220
},
{
"epoch": 1.78,
"learning_rate": 3.7667157313521696e-05,
"loss": 3.0435,
"step": 8230
},
{
"epoch": 1.78,
"learning_rate": 3.763700401968109e-05,
"loss": 3.0005,
"step": 8240
},
{
"epoch": 1.78,
"learning_rate": 3.7606826012528476e-05,
"loss": 3.051,
"step": 8250
},
{
"epoch": 1.78,
"learning_rate": 3.7579644724985924e-05,
"loss": 3.0435,
"step": 8260
},
{
"epoch": 1.79,
"learning_rate": 3.755244351052064e-05,
"loss": 2.978,
"step": 8270
},
{
"epoch": 1.79,
"learning_rate": 3.752219662021882e-05,
"loss": 2.9811,
"step": 8280
},
{
"epoch": 1.79,
"learning_rate": 3.749192524112586e-05,
"loss": 3.0102,
"step": 8290
},
{
"epoch": 1.79,
"learning_rate": 3.746162943244138e-05,
"loss": 2.9856,
"step": 8300
},
{
"epoch": 1.79,
"learning_rate": 3.7431309253412796e-05,
"loss": 3.0353,
"step": 8310
},
{
"epoch": 1.8,
"learning_rate": 3.7400964763335174e-05,
"loss": 3.0231,
"step": 8320
},
{
"epoch": 1.8,
"learning_rate": 3.737059602155113e-05,
"loss": 3.0425,
"step": 8330
},
{
"epoch": 1.8,
"learning_rate": 3.734020308745071e-05,
"loss": 2.9911,
"step": 8340
},
{
"epoch": 1.8,
"learning_rate": 3.730978602047126e-05,
"loss": 2.9758,
"step": 8350
},
{
"epoch": 1.8,
"learning_rate": 3.727934488009734e-05,
"loss": 3.0029,
"step": 8360
},
{
"epoch": 1.81,
"learning_rate": 3.7248879725860565e-05,
"loss": 2.9973,
"step": 8370
},
{
"epoch": 1.81,
"learning_rate": 3.722144060443545e-05,
"loss": 3.0256,
"step": 8380
},
{
"epoch": 1.81,
"learning_rate": 3.7193982123504736e-05,
"loss": 3.0528,
"step": 8390
},
{
"epoch": 1.81,
"learning_rate": 3.7163450047559525e-05,
"loss": 3.0199,
"step": 8400
},
{
"epoch": 1.82,
"learning_rate": 3.713289418439894e-05,
"loss": 3.0149,
"step": 8410
},
{
"epoch": 1.82,
"learning_rate": 3.710231459377896e-05,
"loss": 2.9873,
"step": 8420
},
{
"epoch": 1.82,
"learning_rate": 3.707171133550199e-05,
"loss": 3.006,
"step": 8430
},
{
"epoch": 1.82,
"learning_rate": 3.704108446941668e-05,
"loss": 3.0419,
"step": 8440
},
{
"epoch": 1.82,
"learning_rate": 3.701043405541786e-05,
"loss": 3.0296,
"step": 8450
},
{
"epoch": 1.83,
"learning_rate": 3.697976015344645e-05,
"loss": 3.0332,
"step": 8460
},
{
"epoch": 1.83,
"learning_rate": 3.6949062823489244e-05,
"loss": 3.025,
"step": 8470
},
{
"epoch": 1.83,
"learning_rate": 3.692141524521594e-05,
"loss": 3.0212,
"step": 8480
},
{
"epoch": 1.83,
"learning_rate": 3.689067356751343e-05,
"loss": 3.0744,
"step": 8490
},
{
"epoch": 1.84,
"learning_rate": 3.685990863604559e-05,
"loss": 3.0107,
"step": 8500
},
{
"epoch": 1.84,
"learning_rate": 3.682912051097726e-05,
"loss": 3.0283,
"step": 8510
},
{
"epoch": 1.84,
"learning_rate": 3.6798309252518636e-05,
"loss": 3.0086,
"step": 8520
},
{
"epoch": 1.84,
"learning_rate": 3.676747492092516e-05,
"loss": 3.0201,
"step": 8530
},
{
"epoch": 1.84,
"learning_rate": 3.67366175764974e-05,
"loss": 3.0044,
"step": 8540
},
{
"epoch": 1.85,
"learning_rate": 3.670573727958093e-05,
"loss": 3.0063,
"step": 8550
},
{
"epoch": 1.85,
"learning_rate": 3.667483409056618e-05,
"loss": 3.0278,
"step": 8560
},
{
"epoch": 1.85,
"learning_rate": 3.66439080698884e-05,
"loss": 3.0844,
"step": 8570
},
{
"epoch": 1.85,
"learning_rate": 3.661295927802745e-05,
"loss": 2.9546,
"step": 8580
},
{
"epoch": 1.85,
"learning_rate": 3.658198777550774e-05,
"loss": 3.0687,
"step": 8590
},
{
"epoch": 1.86,
"learning_rate": 3.655099362289808e-05,
"loss": 3.0078,
"step": 8600
},
{
"epoch": 1.86,
"learning_rate": 3.651997688081159e-05,
"loss": 3.0645,
"step": 8610
},
{
"epoch": 1.86,
"learning_rate": 3.6488937609905555e-05,
"loss": 3.0123,
"step": 8620
},
{
"epoch": 1.86,
"learning_rate": 3.645787587088133e-05,
"loss": 3.0145,
"step": 8630
},
{
"epoch": 1.87,
"learning_rate": 3.64267917244842e-05,
"loss": 3.0487,
"step": 8640
},
{
"epoch": 1.87,
"learning_rate": 3.639568523150327e-05,
"loss": 3.0715,
"step": 8650
},
{
"epoch": 1.87,
"learning_rate": 3.636455645277137e-05,
"loss": 3.0261,
"step": 8660
},
{
"epoch": 1.87,
"learning_rate": 3.633340544916487e-05,
"loss": 2.9886,
"step": 8670
},
{
"epoch": 1.87,
"learning_rate": 3.630223228160363e-05,
"loss": 3.0127,
"step": 8680
},
{
"epoch": 1.88,
"learning_rate": 3.627727783047304e-05,
"loss": 3.0138,
"step": 8690
},
{
"epoch": 1.88,
"learning_rate": 3.624918720305117e-05,
"loss": 2.9864,
"step": 8700
},
{
"epoch": 1.88,
"learning_rate": 3.621795449744562e-05,
"loss": 3.0469,
"step": 8710
},
{
"epoch": 1.88,
"learning_rate": 3.6186699853664755e-05,
"loss": 3.0342,
"step": 8720
},
{
"epoch": 1.88,
"learning_rate": 3.615542333283112e-05,
"loss": 2.9967,
"step": 8730
},
{
"epoch": 1.89,
"learning_rate": 3.6124124996110015e-05,
"loss": 2.9726,
"step": 8740
},
{
"epoch": 1.89,
"learning_rate": 3.609280490470944e-05,
"loss": 3.005,
"step": 8750
},
{
"epoch": 1.89,
"learning_rate": 3.6061463119879915e-05,
"loss": 3.0652,
"step": 8760
},
{
"epoch": 1.89,
"learning_rate": 3.60300997029144e-05,
"loss": 3.032,
"step": 8770
},
{
"epoch": 1.9,
"learning_rate": 3.5998714715148165e-05,
"loss": 3.0723,
"step": 8780
},
{
"epoch": 1.9,
"learning_rate": 3.596730821795863e-05,
"loss": 3.0195,
"step": 8790
},
{
"epoch": 1.9,
"learning_rate": 3.59390240306935e-05,
"loss": 3.0324,
"step": 8800
},
{
"epoch": 1.9,
"learning_rate": 3.591072251534864e-05,
"loss": 3.0523,
"step": 8810
},
{
"epoch": 1.9,
"learning_rate": 3.58792561186595e-05,
"loss": 2.9944,
"step": 8820
},
{
"epoch": 1.91,
"learning_rate": 3.584776844616396e-05,
"loss": 3.0669,
"step": 8830
},
{
"epoch": 1.91,
"learning_rate": 3.581625955944028e-05,
"loss": 3.0384,
"step": 8840
},
{
"epoch": 1.91,
"learning_rate": 3.578472952010819e-05,
"loss": 3.0283,
"step": 8850
},
{
"epoch": 1.91,
"learning_rate": 3.5753178389828816e-05,
"loss": 3.0432,
"step": 8860
},
{
"epoch": 1.91,
"learning_rate": 3.5721606230304515e-05,
"loss": 3.0139,
"step": 8870
},
{
"epoch": 1.92,
"learning_rate": 3.569001310327876e-05,
"loss": 3.0053,
"step": 8880
},
{
"epoch": 1.92,
"learning_rate": 3.5658399070536047e-05,
"loss": 3.0054,
"step": 8890
},
{
"epoch": 1.92,
"learning_rate": 3.562676419390174e-05,
"loss": 3.0481,
"step": 8900
},
{
"epoch": 1.92,
"learning_rate": 3.559510853524197e-05,
"loss": 3.0292,
"step": 8910
},
{
"epoch": 1.93,
"learning_rate": 3.5563432156463526e-05,
"loss": 2.9672,
"step": 8920
},
{
"epoch": 1.93,
"learning_rate": 3.5531735119513686e-05,
"loss": 3.0094,
"step": 8930
},
{
"epoch": 1.93,
"learning_rate": 3.550319017475429e-05,
"loss": 2.9928,
"step": 8940
},
{
"epoch": 1.93,
"learning_rate": 3.547145405808821e-05,
"loss": 3.0295,
"step": 8950
},
{
"epoch": 1.93,
"learning_rate": 3.544287404237584e-05,
"loss": 3.0054,
"step": 8960
},
{
"epoch": 1.94,
"learning_rate": 3.54110990700449e-05,
"loss": 2.9728,
"step": 8970
},
{
"epoch": 1.94,
"learning_rate": 3.5379303737449825e-05,
"loss": 3.0396,
"step": 8980
},
{
"epoch": 1.94,
"learning_rate": 3.534748810677054e-05,
"loss": 2.9779,
"step": 8990
},
{
"epoch": 1.94,
"learning_rate": 3.531565224022666e-05,
"loss": 3.0415,
"step": 9000
},
{
"epoch": 1.95,
"learning_rate": 3.528379620007739e-05,
"loss": 3.0176,
"step": 9010
},
{
"epoch": 1.95,
"learning_rate": 3.525192004862137e-05,
"loss": 3.0491,
"step": 9020
},
{
"epoch": 1.95,
"learning_rate": 3.5220023848196575e-05,
"loss": 3.0131,
"step": 9030
},
{
"epoch": 1.95,
"learning_rate": 3.51881076611802e-05,
"loss": 3.0051,
"step": 9040
},
{
"epoch": 1.95,
"learning_rate": 3.5156171549988516e-05,
"loss": 3.019,
"step": 9050
},
{
"epoch": 1.96,
"learning_rate": 3.512421557707675e-05,
"loss": 2.9598,
"step": 9060
},
{
"epoch": 1.96,
"learning_rate": 3.5092239804938983e-05,
"loss": 2.9848,
"step": 9070
},
{
"epoch": 1.96,
"learning_rate": 3.5060244296108035e-05,
"loss": 3.0217,
"step": 9080
},
{
"epoch": 1.96,
"learning_rate": 3.502822911315528e-05,
"loss": 3.0102,
"step": 9090
},
{
"epoch": 1.96,
"learning_rate": 3.49961943186906e-05,
"loss": 2.9591,
"step": 9100
},
{
"epoch": 1.97,
"learning_rate": 3.4964139975362206e-05,
"loss": 3.0149,
"step": 9110
},
{
"epoch": 1.97,
"learning_rate": 3.493206614585658e-05,
"loss": 3.0396,
"step": 9120
},
{
"epoch": 1.97,
"learning_rate": 3.489997289289827e-05,
"loss": 3.0442,
"step": 9130
},
{
"epoch": 1.97,
"learning_rate": 3.486786027924981e-05,
"loss": 2.9807,
"step": 9140
},
{
"epoch": 1.98,
"learning_rate": 3.483572836771164e-05,
"loss": 3.0338,
"step": 9150
},
{
"epoch": 1.98,
"learning_rate": 3.480357722112189e-05,
"loss": 2.9899,
"step": 9160
},
{
"epoch": 1.98,
"learning_rate": 3.478106000732185e-05,
"loss": 2.9718,
"step": 9170
},
{
"epoch": 1.98,
"learning_rate": 3.4748876305464383e-05,
"loss": 2.9592,
"step": 9180
},
{
"epoch": 1.98,
"learning_rate": 3.47166735384059e-05,
"loss": 2.9987,
"step": 9190
},
{
"epoch": 1.99,
"learning_rate": 3.468445176912311e-05,
"loss": 2.9839,
"step": 9200
},
{
"epoch": 1.99,
"learning_rate": 3.465221106062989e-05,
"loss": 3.0143,
"step": 9210
},
{
"epoch": 1.99,
"learning_rate": 3.461995147597717e-05,
"loss": 2.9697,
"step": 9220
},
{
"epoch": 1.99,
"learning_rate": 3.4587673078252767e-05,
"loss": 3.0109,
"step": 9230
},
{
"epoch": 1.99,
"learning_rate": 3.4555375930581305e-05,
"loss": 3.0099,
"step": 9240
},
{
"epoch": 2.0,
"learning_rate": 3.452306009612408e-05,
"loss": 2.9753,
"step": 9250
},
{
"epoch": 2.0,
"learning_rate": 3.449072563807894e-05,
"loss": 3.0343,
"step": 9260
},
{
"epoch": 2.0,
"eval_loss": 3.0047240257263184,
"eval_runtime": 269.2531,
"eval_samples_per_second": 550.486,
"eval_steps_per_second": 17.203,
"step": 9264
},
{
"epoch": 2.0,
"learning_rate": 3.4458372619680114e-05,
"loss": 3.0603,
"step": 9270
},
{
"epoch": 2.0,
"learning_rate": 3.4426001104198166e-05,
"loss": 2.9915,
"step": 9280
},
{
"epoch": 2.01,
"learning_rate": 3.439361115493983e-05,
"loss": 3.0188,
"step": 9290
},
{
"epoch": 2.01,
"learning_rate": 3.436120283524788e-05,
"loss": 2.9671,
"step": 9300
},
{
"epoch": 2.01,
"learning_rate": 3.432877620850101e-05,
"loss": 2.9702,
"step": 9310
},
{
"epoch": 2.01,
"learning_rate": 3.4296331338113744e-05,
"loss": 2.9766,
"step": 9320
},
{
"epoch": 2.01,
"learning_rate": 3.426386828753624e-05,
"loss": 2.974,
"step": 9330
},
{
"epoch": 2.02,
"learning_rate": 3.423138712025425e-05,
"loss": 2.9614,
"step": 9340
},
{
"epoch": 2.02,
"learning_rate": 3.419888789978893e-05,
"loss": 3.0015,
"step": 9350
},
{
"epoch": 2.02,
"learning_rate": 3.416637068969675e-05,
"loss": 2.9861,
"step": 9360
},
{
"epoch": 2.02,
"learning_rate": 3.4133835553569374e-05,
"loss": 3.0064,
"step": 9370
},
{
"epoch": 2.03,
"learning_rate": 3.410779458067809e-05,
"loss": 2.9706,
"step": 9380
},
{
"epoch": 2.03,
"learning_rate": 3.4075227338050133e-05,
"loss": 2.9809,
"step": 9390
},
{
"epoch": 2.03,
"learning_rate": 3.404264234762971e-05,
"loss": 2.9755,
"step": 9400
},
{
"epoch": 2.03,
"learning_rate": 3.4010039673141e-05,
"loss": 2.9848,
"step": 9410
},
{
"epoch": 2.03,
"learning_rate": 3.397741937834282e-05,
"loss": 3.0204,
"step": 9420
},
{
"epoch": 2.04,
"learning_rate": 3.39447815270284e-05,
"loss": 3.0066,
"step": 9430
},
{
"epoch": 2.04,
"learning_rate": 3.3912126183025336e-05,
"loss": 2.9777,
"step": 9440
},
{
"epoch": 2.04,
"learning_rate": 3.387945341019541e-05,
"loss": 3.0069,
"step": 9450
},
{
"epoch": 2.04,
"learning_rate": 3.384676327243449e-05,
"loss": 3.0422,
"step": 9460
},
{
"epoch": 2.04,
"learning_rate": 3.381405583367242e-05,
"loss": 2.987,
"step": 9470
},
{
"epoch": 2.05,
"learning_rate": 3.3781331157872866e-05,
"loss": 3.006,
"step": 9480
},
{
"epoch": 2.05,
"learning_rate": 3.3748589309033216e-05,
"loss": 2.9713,
"step": 9490
},
{
"epoch": 2.05,
"learning_rate": 3.371910701504913e-05,
"loss": 3.0123,
"step": 9500
},
{
"epoch": 2.05,
"learning_rate": 3.368961090946896e-05,
"loss": 3.0056,
"step": 9510
},
{
"epoch": 2.06,
"learning_rate": 3.366010103901633e-05,
"loss": 2.9859,
"step": 9520
},
{
"epoch": 2.06,
"learning_rate": 3.3627296206938916e-05,
"loss": 3.0268,
"step": 9530
},
{
"epoch": 2.06,
"learning_rate": 3.359447450305687e-05,
"loss": 2.9677,
"step": 9540
},
{
"epoch": 2.06,
"learning_rate": 3.356163599155732e-05,
"loss": 3.0107,
"step": 9550
},
{
"epoch": 2.06,
"learning_rate": 3.3528780736660246e-05,
"loss": 2.9681,
"step": 9560
},
{
"epoch": 2.07,
"learning_rate": 3.3495908802618416e-05,
"loss": 2.982,
"step": 9570
},
{
"epoch": 2.07,
"learning_rate": 3.346302025371717e-05,
"loss": 2.9877,
"step": 9580
},
{
"epoch": 2.07,
"learning_rate": 3.3430115154274364e-05,
"loss": 3.0147,
"step": 9590
},
{
"epoch": 2.07,
"learning_rate": 3.339719356864024e-05,
"loss": 2.9518,
"step": 9600
},
{
"epoch": 2.07,
"learning_rate": 3.336425556119723e-05,
"loss": 3.0058,
"step": 9610
},
{
"epoch": 2.08,
"learning_rate": 3.333130119635993e-05,
"loss": 2.9761,
"step": 9620
},
{
"epoch": 2.08,
"learning_rate": 3.32983305385749e-05,
"loss": 3.0012,
"step": 9630
},
{
"epoch": 2.08,
"learning_rate": 3.326534365232057e-05,
"loss": 3.0088,
"step": 9640
},
{
"epoch": 2.08,
"learning_rate": 3.3232340602107095e-05,
"loss": 2.9718,
"step": 9650
},
{
"epoch": 2.09,
"learning_rate": 3.3199321452476256e-05,
"loss": 2.9996,
"step": 9660
},
{
"epoch": 2.09,
"learning_rate": 3.316628626800131e-05,
"loss": 2.9885,
"step": 9670
},
{
"epoch": 2.09,
"learning_rate": 3.313323511328688e-05,
"loss": 3.0582,
"step": 9680
},
{
"epoch": 2.09,
"learning_rate": 3.3103475472910025e-05,
"loss": 2.9904,
"step": 9690
},
{
"epoch": 2.09,
"learning_rate": 3.3070394152838054e-05,
"loss": 3.0061,
"step": 9700
},
{
"epoch": 2.1,
"learning_rate": 3.304060746871186e-05,
"loss": 3.0012,
"step": 9710
},
{
"epoch": 2.1,
"learning_rate": 3.3007496216830624e-05,
"loss": 2.9829,
"step": 9720
},
{
"epoch": 2.1,
"learning_rate": 3.2974369305244595e-05,
"loss": 2.9854,
"step": 9730
},
{
"epoch": 2.1,
"learning_rate": 3.294122679873777e-05,
"loss": 2.9801,
"step": 9740
},
{
"epoch": 2.1,
"learning_rate": 3.2908068762124654e-05,
"loss": 3.0196,
"step": 9750
},
{
"epoch": 2.11,
"learning_rate": 3.287489526025011e-05,
"loss": 3.0084,
"step": 9760
},
{
"epoch": 2.11,
"learning_rate": 3.2841706357989264e-05,
"loss": 2.9887,
"step": 9770
},
{
"epoch": 2.11,
"learning_rate": 3.280850212024733e-05,
"loss": 2.9953,
"step": 9780
},
{
"epoch": 2.11,
"learning_rate": 3.2775282611959554e-05,
"loss": 3.0151,
"step": 9790
},
{
"epoch": 2.12,
"learning_rate": 3.274204789809102e-05,
"loss": 3.0489,
"step": 9800
},
{
"epoch": 2.12,
"learning_rate": 3.2708798043636544e-05,
"loss": 3.0269,
"step": 9810
},
{
"epoch": 2.12,
"learning_rate": 3.2675533113620565e-05,
"loss": 2.9857,
"step": 9820
},
{
"epoch": 2.12,
"learning_rate": 3.264225317309699e-05,
"loss": 2.9685,
"step": 9830
},
{
"epoch": 2.12,
"learning_rate": 3.2608958287149106e-05,
"loss": 2.9975,
"step": 9840
},
{
"epoch": 2.13,
"learning_rate": 3.2578980165273095e-05,
"loss": 3.0332,
"step": 9850
},
{
"epoch": 2.13,
"learning_rate": 3.2545657062427987e-05,
"loss": 3.0324,
"step": 9860
},
{
"epoch": 2.13,
"learning_rate": 3.2512319203064876e-05,
"loss": 2.9602,
"step": 9870
},
{
"epoch": 2.13,
"learning_rate": 3.24789666523803e-05,
"loss": 2.9911,
"step": 9880
},
{
"epoch": 2.14,
"learning_rate": 3.2445599475599524e-05,
"loss": 2.9779,
"step": 9890
},
{
"epoch": 2.14,
"learning_rate": 3.241555656511631e-05,
"loss": 3.0668,
"step": 9900
},
{
"epoch": 2.14,
"learning_rate": 3.238216177855071e-05,
"loss": 2.969,
"step": 9910
},
{
"epoch": 2.14,
"learning_rate": 3.234875255520349e-05,
"loss": 3.0474,
"step": 9920
},
{
"epoch": 2.14,
"learning_rate": 3.231532896041075e-05,
"loss": 3.0185,
"step": 9930
},
{
"epoch": 2.15,
"learning_rate": 3.2281891059536694e-05,
"loss": 2.9814,
"step": 9940
},
{
"epoch": 2.15,
"learning_rate": 3.22484389179735e-05,
"loss": 2.9767,
"step": 9950
},
{
"epoch": 2.15,
"learning_rate": 3.22149726011412e-05,
"loss": 2.998,
"step": 9960
},
{
"epoch": 2.15,
"learning_rate": 3.218149217448756e-05,
"loss": 3.0276,
"step": 9970
},
{
"epoch": 2.15,
"learning_rate": 3.2147997703487917e-05,
"loss": 2.9853,
"step": 9980
},
{
"epoch": 2.16,
"learning_rate": 3.211448925364509e-05,
"loss": 3.0142,
"step": 9990
},
{
"epoch": 2.16,
"learning_rate": 3.2080966890489214e-05,
"loss": 2.9912,
"step": 10000
},
{
"epoch": 2.16,
"learning_rate": 3.204743067957767e-05,
"loss": 3.0114,
"step": 10010
},
{
"epoch": 2.16,
"learning_rate": 3.201388068649489e-05,
"loss": 3.0503,
"step": 10020
},
{
"epoch": 2.17,
"learning_rate": 3.1980316976852266e-05,
"loss": 3.0349,
"step": 10030
},
{
"epoch": 2.17,
"learning_rate": 3.1946739616288017e-05,
"loss": 2.9529,
"step": 10040
},
{
"epoch": 2.17,
"learning_rate": 3.191314867046706e-05,
"loss": 2.9701,
"step": 10050
},
{
"epoch": 2.17,
"learning_rate": 3.1882905258127185e-05,
"loss": 3.0494,
"step": 10060
},
{
"epoch": 2.17,
"learning_rate": 3.1849288681320364e-05,
"loss": 2.9899,
"step": 10070
},
{
"epoch": 2.18,
"learning_rate": 3.181565870983488e-05,
"loss": 2.9895,
"step": 10080
},
{
"epoch": 2.18,
"learning_rate": 3.178201540943852e-05,
"loss": 3.0241,
"step": 10090
},
{
"epoch": 2.18,
"learning_rate": 3.1748358845925144e-05,
"loss": 3.0096,
"step": 10100
},
{
"epoch": 2.18,
"learning_rate": 3.17180566531976e-05,
"loss": 2.9691,
"step": 10110
},
{
"epoch": 2.18,
"learning_rate": 3.1684375071116814e-05,
"loss": 2.9573,
"step": 10120
},
{
"epoch": 2.19,
"learning_rate": 3.165068041686746e-05,
"loss": 3.0214,
"step": 10130
},
{
"epoch": 2.19,
"learning_rate": 3.161697275634382e-05,
"loss": 2.9314,
"step": 10140
},
{
"epoch": 2.19,
"learning_rate": 3.1583252155465655e-05,
"loss": 3.0182,
"step": 10150
},
{
"epoch": 2.19,
"learning_rate": 3.154951868017799e-05,
"loss": 2.9806,
"step": 10160
},
{
"epoch": 2.2,
"learning_rate": 3.151577239645104e-05,
"loss": 3.0227,
"step": 10170
},
{
"epoch": 2.2,
"learning_rate": 3.148201337028007e-05,
"loss": 3.0074,
"step": 10180
},
{
"epoch": 2.2,
"learning_rate": 3.144824166768526e-05,
"loss": 2.9441,
"step": 10190
},
{
"epoch": 2.2,
"learning_rate": 3.14144573547116e-05,
"loss": 2.9978,
"step": 10200
},
{
"epoch": 2.2,
"learning_rate": 3.1380660497428724e-05,
"loss": 3.0041,
"step": 10210
},
{
"epoch": 2.21,
"learning_rate": 3.134685116193079e-05,
"loss": 2.9992,
"step": 10220
},
{
"epoch": 2.21,
"learning_rate": 3.131302941433639e-05,
"loss": 2.9787,
"step": 10230
},
{
"epoch": 2.21,
"learning_rate": 3.127919532078835e-05,
"loss": 2.9329,
"step": 10240
},
{
"epoch": 2.21,
"learning_rate": 3.1245348947453665e-05,
"loss": 3.0237,
"step": 10250
},
{
"epoch": 2.22,
"learning_rate": 3.1211490360523335e-05,
"loss": 2.9839,
"step": 10260
},
{
"epoch": 2.22,
"learning_rate": 3.117761962621226e-05,
"loss": 3.0036,
"step": 10270
},
{
"epoch": 2.22,
"learning_rate": 3.1143736810759076e-05,
"loss": 2.9802,
"step": 10280
},
{
"epoch": 2.22,
"learning_rate": 3.111323200224e-05,
"loss": 2.9766,
"step": 10290
},
{
"epoch": 2.22,
"learning_rate": 3.1079326415188795e-05,
"loss": 3.0417,
"step": 10300
},
{
"epoch": 2.23,
"learning_rate": 3.10454089392207e-05,
"loss": 3.0298,
"step": 10310
},
{
"epoch": 2.23,
"learning_rate": 3.101147964066577e-05,
"loss": 2.9722,
"step": 10320
},
{
"epoch": 2.23,
"learning_rate": 3.097753858587717e-05,
"loss": 3.0053,
"step": 10330
},
{
"epoch": 2.23,
"learning_rate": 3.094358584123108e-05,
"loss": 2.9709,
"step": 10340
},
{
"epoch": 2.23,
"learning_rate": 3.0909621473126526e-05,
"loss": 2.9854,
"step": 10350
},
{
"epoch": 2.24,
"learning_rate": 3.087564554798526e-05,
"loss": 3.0126,
"step": 10360
},
{
"epoch": 2.24,
"learning_rate": 3.084165813225163e-05,
"loss": 2.9911,
"step": 10370
},
{
"epoch": 2.24,
"learning_rate": 3.0807659292392495e-05,
"loss": 3.0408,
"step": 10380
},
{
"epoch": 2.24,
"learning_rate": 3.077705062384479e-05,
"loss": 2.9707,
"step": 10390
},
{
"epoch": 2.25,
"learning_rate": 3.074303026134319e-05,
"loss": 3.0048,
"step": 10400
},
{
"epoch": 2.25,
"learning_rate": 3.070899866759575e-05,
"loss": 3.0244,
"step": 10410
},
{
"epoch": 2.25,
"learning_rate": 3.06749559091557e-05,
"loss": 2.9753,
"step": 10420
},
{
"epoch": 2.25,
"learning_rate": 3.064090205259811e-05,
"loss": 3.0211,
"step": 10430
},
{
"epoch": 2.25,
"learning_rate": 3.060683716451973e-05,
"loss": 3.0083,
"step": 10440
},
{
"epoch": 2.26,
"learning_rate": 3.0572761311538914e-05,
"loss": 2.9368,
"step": 10450
},
{
"epoch": 2.26,
"learning_rate": 3.0538674560295423e-05,
"loss": 3.0115,
"step": 10460
},
{
"epoch": 2.26,
"learning_rate": 3.0504576977450367e-05,
"loss": 2.9867,
"step": 10470
},
{
"epoch": 2.26,
"learning_rate": 3.0470468629686016e-05,
"loss": 2.9864,
"step": 10480
},
{
"epoch": 2.26,
"learning_rate": 3.0439761967821973e-05,
"loss": 2.97,
"step": 10490
},
{
"epoch": 2.27,
"learning_rate": 3.040563335049593e-05,
"loss": 2.9813,
"step": 10500
},
{
"epoch": 2.27,
"learning_rate": 3.0371494161747795e-05,
"loss": 3.0057,
"step": 10510
},
{
"epoch": 2.27,
"learning_rate": 3.0337344468341185e-05,
"loss": 3.0119,
"step": 10520
},
{
"epoch": 2.27,
"learning_rate": 3.0303184337060296e-05,
"loss": 3.0043,
"step": 10530
},
{
"epoch": 2.28,
"learning_rate": 3.0269013834709736e-05,
"loss": 2.9854,
"step": 10540
},
{
"epoch": 2.28,
"learning_rate": 3.023483302811438e-05,
"loss": 2.9886,
"step": 10550
},
{
"epoch": 2.28,
"learning_rate": 3.020064198411926e-05,
"loss": 2.9693,
"step": 10560
},
{
"epoch": 2.28,
"learning_rate": 3.0166440769589422e-05,
"loss": 2.9823,
"step": 10570
},
{
"epoch": 2.28,
"learning_rate": 3.0132229451409816e-05,
"loss": 2.9687,
"step": 10580
},
{
"epoch": 2.29,
"learning_rate": 3.0101430681723946e-05,
"loss": 3.0119,
"step": 10590
},
{
"epoch": 2.29,
"learning_rate": 3.007062383106144e-05,
"loss": 2.9998,
"step": 10600
},
{
"epoch": 2.29,
"learning_rate": 3.003638457865806e-05,
"loss": 3.0144,
"step": 10610
},
{
"epoch": 2.29,
"learning_rate": 3.0005560828428024e-05,
"loss": 3.0269,
"step": 10620
},
{
"epoch": 2.29,
"learning_rate": 2.9971302919605776e-05,
"loss": 3.0248,
"step": 10630
},
{
"epoch": 2.3,
"learning_rate": 2.9937035288751313e-05,
"loss": 2.9338,
"step": 10640
},
{
"epoch": 2.3,
"learning_rate": 2.990275800287947e-05,
"loss": 3.0118,
"step": 10650
},
{
"epoch": 2.3,
"learning_rate": 2.9868471129023946e-05,
"loss": 3.0047,
"step": 10660
},
{
"epoch": 2.3,
"learning_rate": 2.9834174734237207e-05,
"loss": 2.9957,
"step": 10670
},
{
"epoch": 2.31,
"learning_rate": 2.9799868885590332e-05,
"loss": 2.9789,
"step": 10680
},
{
"epoch": 2.31,
"learning_rate": 2.9765553650172885e-05,
"loss": 2.9715,
"step": 10690
},
{
"epoch": 2.31,
"learning_rate": 2.9731229095092794e-05,
"loss": 3.0005,
"step": 10700
},
{
"epoch": 2.31,
"learning_rate": 2.9696895287476213e-05,
"loss": 3.0225,
"step": 10710
},
{
"epoch": 2.31,
"learning_rate": 2.9662552294467377e-05,
"loss": 3.0478,
"step": 10720
},
{
"epoch": 2.32,
"learning_rate": 2.9628200183228506e-05,
"loss": 3.0138,
"step": 10730
},
{
"epoch": 2.32,
"learning_rate": 2.9593839020939635e-05,
"loss": 3.0271,
"step": 10740
},
{
"epoch": 2.32,
"learning_rate": 2.9559468874798514e-05,
"loss": 2.9906,
"step": 10750
},
{
"epoch": 2.32,
"learning_rate": 2.952508981202044e-05,
"loss": 3.0068,
"step": 10760
},
{
"epoch": 2.33,
"learning_rate": 2.949070189983817e-05,
"loss": 3.01,
"step": 10770
},
{
"epoch": 2.33,
"learning_rate": 2.9456305205501755e-05,
"loss": 2.9406,
"step": 10780
},
{
"epoch": 2.33,
"learning_rate": 2.942189979627842e-05,
"loss": 2.9881,
"step": 10790
},
{
"epoch": 2.33,
"learning_rate": 2.9387485739452443e-05,
"loss": 2.9792,
"step": 10800
},
{
"epoch": 2.33,
"learning_rate": 2.9356505750232883e-05,
"loss": 2.9994,
"step": 10810
},
{
"epoch": 2.34,
"learning_rate": 2.9322075448390518e-05,
"loss": 2.9992,
"step": 10820
},
{
"epoch": 2.34,
"learning_rate": 2.9287636694165055e-05,
"loss": 3.0451,
"step": 10830
},
{
"epoch": 2.34,
"learning_rate": 2.925318955490598e-05,
"loss": 3.0286,
"step": 10840
},
{
"epoch": 2.34,
"learning_rate": 2.921873409797918e-05,
"loss": 2.9811,
"step": 10850
},
{
"epoch": 2.34,
"learning_rate": 2.9184270390766787e-05,
"loss": 2.9958,
"step": 10860
},
{
"epoch": 2.35,
"learning_rate": 2.9149798500667102e-05,
"loss": 2.9844,
"step": 10870
},
{
"epoch": 2.35,
"learning_rate": 2.9115318495094395e-05,
"loss": 2.9369,
"step": 10880
},
{
"epoch": 2.35,
"learning_rate": 2.9084279607080227e-05,
"loss": 3.0285,
"step": 10890
},
{
"epoch": 2.35,
"learning_rate": 2.9049784367891875e-05,
"loss": 2.9642,
"step": 10900
},
{
"epoch": 2.36,
"learning_rate": 2.90152812088212e-05,
"loss": 2.9874,
"step": 10910
},
{
"epoch": 2.36,
"learning_rate": 2.8980770197343644e-05,
"loss": 2.986,
"step": 10920
},
{
"epoch": 2.36,
"learning_rate": 2.894625140094999e-05,
"loss": 2.9846,
"step": 10930
},
{
"epoch": 2.36,
"learning_rate": 2.8911724887146265e-05,
"loss": 2.9195,
"step": 10940
},
{
"epoch": 2.36,
"learning_rate": 2.887719072345357e-05,
"loss": 3.032,
"step": 10950
},
{
"epoch": 2.37,
"learning_rate": 2.8842648977407966e-05,
"loss": 2.926,
"step": 10960
},
{
"epoch": 2.37,
"learning_rate": 2.880809971656036e-05,
"loss": 2.9755,
"step": 10970
},
{
"epoch": 2.37,
"learning_rate": 2.8773543008476346e-05,
"loss": 2.9796,
"step": 10980
},
{
"epoch": 2.37,
"learning_rate": 2.873897892073607e-05,
"loss": 2.928,
"step": 10990
},
{
"epoch": 2.37,
"learning_rate": 2.870786498803035e-05,
"loss": 3.0096,
"step": 11000
},
{
"epoch": 2.38,
"learning_rate": 2.8673287065178108e-05,
"loss": 3.0241,
"step": 11010
},
{
"epoch": 2.38,
"learning_rate": 2.8638701958733188e-05,
"loss": 2.9953,
"step": 11020
},
{
"epoch": 2.38,
"learning_rate": 2.860410973633131e-05,
"loss": 2.9796,
"step": 11030
},
{
"epoch": 2.38,
"learning_rate": 2.8572970707938513e-05,
"loss": 2.9776,
"step": 11040
},
{
"epoch": 2.39,
"learning_rate": 2.8538365151604453e-05,
"loss": 2.9794,
"step": 11050
},
{
"epoch": 2.39,
"learning_rate": 2.850375267553515e-05,
"loss": 2.9823,
"step": 11060
},
{
"epoch": 2.39,
"learning_rate": 2.8469133347419835e-05,
"loss": 2.9979,
"step": 11070
},
{
"epoch": 2.39,
"learning_rate": 2.843450723496111e-05,
"loss": 2.9521,
"step": 11080
},
{
"epoch": 2.39,
"learning_rate": 2.8399874405874866e-05,
"loss": 2.9632,
"step": 11090
},
{
"epoch": 2.4,
"learning_rate": 2.8365234927890133e-05,
"loss": 2.9378,
"step": 11100
},
{
"epoch": 2.4,
"learning_rate": 2.833058886874893e-05,
"loss": 2.9747,
"step": 11110
},
{
"epoch": 2.4,
"learning_rate": 2.8295936296206144e-05,
"loss": 2.9673,
"step": 11120
},
{
"epoch": 2.4,
"learning_rate": 2.8261277278029417e-05,
"loss": 3.0191,
"step": 11130
},
{
"epoch": 2.41,
"learning_rate": 2.822661188199898e-05,
"loss": 2.9542,
"step": 11140
},
{
"epoch": 2.41,
"learning_rate": 2.8198875018676247e-05,
"loss": 2.9571,
"step": 11150
},
{
"epoch": 2.41,
"learning_rate": 2.816419831335506e-05,
"loss": 2.9897,
"step": 11160
},
{
"epoch": 2.41,
"learning_rate": 2.812951542003076e-05,
"loss": 2.9851,
"step": 11170
},
{
"epoch": 2.41,
"learning_rate": 2.8094826406530277e-05,
"loss": 3.0083,
"step": 11180
},
{
"epoch": 2.42,
"learning_rate": 2.8060131340692515e-05,
"loss": 2.9346,
"step": 11190
},
{
"epoch": 2.42,
"learning_rate": 2.8025430290368186e-05,
"loss": 2.9595,
"step": 11200
},
{
"epoch": 2.42,
"learning_rate": 2.799072332341975e-05,
"loss": 3.0067,
"step": 11210
},
{
"epoch": 2.42,
"learning_rate": 2.7956010507721193e-05,
"loss": 2.9786,
"step": 11220
},
{
"epoch": 2.42,
"learning_rate": 2.7921291911157975e-05,
"loss": 2.9779,
"step": 11230
},
{
"epoch": 2.43,
"learning_rate": 2.788656760162685e-05,
"loss": 2.9619,
"step": 11240
},
{
"epoch": 2.43,
"learning_rate": 2.7851837647035727e-05,
"loss": 2.9729,
"step": 11250
},
{
"epoch": 2.43,
"learning_rate": 2.7817102115303577e-05,
"loss": 2.9985,
"step": 11260
},
{
"epoch": 2.43,
"learning_rate": 2.778236107436027e-05,
"loss": 2.9571,
"step": 11270
},
{
"epoch": 2.44,
"learning_rate": 2.774761459214645e-05,
"loss": 2.9435,
"step": 11280
},
{
"epoch": 2.44,
"learning_rate": 2.771633816202924e-05,
"loss": 2.9572,
"step": 11290
},
{
"epoch": 2.44,
"learning_rate": 2.768158152861591e-05,
"loss": 2.9507,
"step": 11300
},
{
"epoch": 2.44,
"learning_rate": 2.7646819651019633e-05,
"loss": 2.942,
"step": 11310
},
{
"epoch": 2.44,
"learning_rate": 2.7612052597221804e-05,
"loss": 3.0538,
"step": 11320
},
{
"epoch": 2.45,
"learning_rate": 2.7577280435213927e-05,
"loss": 3.0008,
"step": 11330
},
{
"epoch": 2.45,
"learning_rate": 2.7542503232997514e-05,
"loss": 2.9764,
"step": 11340
},
{
"epoch": 2.45,
"learning_rate": 2.7507721058583924e-05,
"loss": 2.9415,
"step": 11350
},
{
"epoch": 2.45,
"learning_rate": 2.7472933979994236e-05,
"loss": 2.9961,
"step": 11360
},
{
"epoch": 2.45,
"learning_rate": 2.7441621472420127e-05,
"loss": 2.9974,
"step": 11370
},
{
"epoch": 2.46,
"learning_rate": 2.7406825263328206e-05,
"loss": 3.0028,
"step": 11380
},
{
"epoch": 2.46,
"learning_rate": 2.7372024347375097e-05,
"loss": 2.9188,
"step": 11390
},
{
"epoch": 2.46,
"learning_rate": 2.733721879261854e-05,
"loss": 3.0055,
"step": 11400
},
{
"epoch": 2.46,
"learning_rate": 2.730240866712535e-05,
"loss": 3.0241,
"step": 11410
},
{
"epoch": 2.47,
"learning_rate": 2.7267594038971267e-05,
"loss": 3.0057,
"step": 11420
},
{
"epoch": 2.47,
"learning_rate": 2.723277497624085e-05,
"loss": 3.0129,
"step": 11430
},
{
"epoch": 2.47,
"learning_rate": 2.719795154702731e-05,
"loss": 3.0115,
"step": 11440
},
{
"epoch": 2.47,
"learning_rate": 2.716312381943243e-05,
"loss": 2.9474,
"step": 11450
},
{
"epoch": 2.47,
"learning_rate": 2.7128291861566363e-05,
"loss": 2.9598,
"step": 11460
},
{
"epoch": 2.48,
"learning_rate": 2.7093455741547568e-05,
"loss": 2.9912,
"step": 11470
},
{
"epoch": 2.48,
"learning_rate": 2.7058615527502622e-05,
"loss": 2.9949,
"step": 11480
},
{
"epoch": 2.48,
"learning_rate": 2.7023771287566115e-05,
"loss": 3.0112,
"step": 11490
},
{
"epoch": 2.48,
"learning_rate": 2.6988923089880504e-05,
"loss": 2.9989,
"step": 11500
},
{
"epoch": 2.48,
"learning_rate": 2.6954071002595994e-05,
"loss": 3.004,
"step": 11510
},
{
"epoch": 2.49,
"learning_rate": 2.69192150938704e-05,
"loss": 2.9703,
"step": 11520
},
{
"epoch": 2.49,
"learning_rate": 2.689132766125916e-05,
"loss": 2.99,
"step": 11530
},
{
"epoch": 2.49,
"learning_rate": 2.6856465045721057e-05,
"loss": 2.964,
"step": 11540
},
{
"epoch": 2.49,
"learning_rate": 2.682159879962306e-05,
"loss": 2.9806,
"step": 11550
},
{
"epoch": 2.5,
"learning_rate": 2.6786728991150665e-05,
"loss": 2.9763,
"step": 11560
},
{
"epoch": 2.5,
"learning_rate": 2.675185568849636e-05,
"loss": 2.9496,
"step": 11570
},
{
"epoch": 2.5,
"learning_rate": 2.671697895985941e-05,
"loss": 2.9817,
"step": 11580
},
{
"epoch": 2.5,
"learning_rate": 2.6682098873445845e-05,
"loss": 2.9688,
"step": 11590
},
{
"epoch": 2.5,
"learning_rate": 2.664721549746821e-05,
"loss": 2.9217,
"step": 11600
},
{
"epoch": 2.51,
"learning_rate": 2.661232890014551e-05,
"loss": 2.9785,
"step": 11610
},
{
"epoch": 2.51,
"learning_rate": 2.6577439149703042e-05,
"loss": 3.0618,
"step": 11620
},
{
"epoch": 2.51,
"learning_rate": 2.6542546314372268e-05,
"loss": 2.9743,
"step": 11630
},
{
"epoch": 2.51,
"learning_rate": 2.650765046239069e-05,
"loss": 2.9859,
"step": 11640
},
{
"epoch": 2.52,
"learning_rate": 2.6472751662001698e-05,
"loss": 2.9665,
"step": 11650
},
{
"epoch": 2.52,
"learning_rate": 2.6437849981454464e-05,
"loss": 2.9424,
"step": 11660
},
{
"epoch": 2.52,
"learning_rate": 2.6402945489003773e-05,
"loss": 3.0069,
"step": 11670
},
{
"epoch": 2.52,
"learning_rate": 2.6368038252909923e-05,
"loss": 2.9902,
"step": 11680
},
{
"epoch": 2.52,
"learning_rate": 2.6333128341438578e-05,
"loss": 3.0066,
"step": 11690
},
{
"epoch": 2.53,
"learning_rate": 2.629821582286063e-05,
"loss": 2.9378,
"step": 11700
},
{
"epoch": 2.53,
"learning_rate": 2.6263300765452063e-05,
"loss": 2.9884,
"step": 11710
},
{
"epoch": 2.53,
"learning_rate": 2.6228383237493837e-05,
"loss": 2.9975,
"step": 11720
},
{
"epoch": 2.53,
"learning_rate": 2.6200447482219387e-05,
"loss": 2.9469,
"step": 11730
},
{
"epoch": 2.53,
"learning_rate": 2.616901796333863e-05,
"loss": 2.9747,
"step": 11740
},
{
"epoch": 2.54,
"learning_rate": 2.613409410599706e-05,
"loss": 3.0098,
"step": 11750
},
{
"epoch": 2.54,
"learning_rate": 2.6099168030786325e-05,
"loss": 2.9808,
"step": 11760
},
{
"epoch": 2.54,
"learning_rate": 2.6064239806008926e-05,
"loss": 2.9843,
"step": 11770
},
{
"epoch": 2.54,
"learning_rate": 2.602930949997158e-05,
"loss": 2.9828,
"step": 11780
},
{
"epoch": 2.55,
"learning_rate": 2.5994377180985062e-05,
"loss": 2.9412,
"step": 11790
},
{
"epoch": 2.55,
"learning_rate": 2.595944291736408e-05,
"loss": 2.9708,
"step": 11800
},
{
"epoch": 2.55,
"learning_rate": 2.5924506777427143e-05,
"loss": 2.9904,
"step": 11810
},
{
"epoch": 2.55,
"learning_rate": 2.5889568829496447e-05,
"loss": 2.953,
"step": 11820
},
{
"epoch": 2.55,
"learning_rate": 2.5854629141897708e-05,
"loss": 2.96,
"step": 11830
},
{
"epoch": 2.56,
"learning_rate": 2.5819687782960052e-05,
"loss": 2.9756,
"step": 11840
},
{
"epoch": 2.56,
"learning_rate": 2.578474482101586e-05,
"loss": 2.9756,
"step": 11850
},
{
"epoch": 2.56,
"learning_rate": 2.5753294841174762e-05,
"loss": 2.9842,
"step": 11860
},
{
"epoch": 2.56,
"learning_rate": 2.5721843668068796e-05,
"loss": 2.9407,
"step": 11870
},
{
"epoch": 2.56,
"learning_rate": 2.568689658126082e-05,
"loss": 2.9448,
"step": 11880
},
{
"epoch": 2.57,
"learning_rate": 2.5651948151136863e-05,
"loss": 2.9692,
"step": 11890
},
{
"epoch": 2.57,
"learning_rate": 2.561699844604314e-05,
"loss": 2.9535,
"step": 11900
},
{
"epoch": 2.57,
"learning_rate": 2.558204753432836e-05,
"loss": 3.0087,
"step": 11910
},
{
"epoch": 2.57,
"learning_rate": 2.55470954843436e-05,
"loss": 2.9308,
"step": 11920
},
{
"epoch": 2.58,
"learning_rate": 2.551214236444216e-05,
"loss": 2.9622,
"step": 11930
},
{
"epoch": 2.58,
"learning_rate": 2.5480683698247793e-05,
"loss": 2.9453,
"step": 11940
},
{
"epoch": 2.58,
"learning_rate": 2.5445728733825374e-05,
"loss": 2.9455,
"step": 11950
},
{
"epoch": 2.58,
"learning_rate": 2.54107728977222e-05,
"loss": 2.9724,
"step": 11960
},
{
"epoch": 2.58,
"learning_rate": 2.5375816258298973e-05,
"loss": 2.9662,
"step": 11970
},
{
"epoch": 2.59,
"learning_rate": 2.534085888391796e-05,
"loss": 3.0116,
"step": 11980
},
{
"epoch": 2.59,
"learning_rate": 2.530590084294287e-05,
"loss": 2.9691,
"step": 11990
},
{
"epoch": 2.59,
"learning_rate": 2.5270942203738736e-05,
"loss": 3.0044,
"step": 12000
},
{
"epoch": 2.59,
"learning_rate": 2.523598303467173e-05,
"loss": 2.9416,
"step": 12010
},
{
"epoch": 2.59,
"learning_rate": 2.520102340410907e-05,
"loss": 2.9581,
"step": 12020
},
{
"epoch": 2.6,
"learning_rate": 2.5166063380418887e-05,
"loss": 2.954,
"step": 12030
},
{
"epoch": 2.6,
"learning_rate": 2.513110303197008e-05,
"loss": 2.906,
"step": 12040
},
{
"epoch": 2.6,
"learning_rate": 2.509614242713216e-05,
"loss": 2.996,
"step": 12050
},
{
"epoch": 2.6,
"learning_rate": 2.5061181634275165e-05,
"loss": 2.9993,
"step": 12060
},
{
"epoch": 2.61,
"learning_rate": 2.50262207217695e-05,
"loss": 2.9823,
"step": 12070
},
{
"epoch": 2.61,
"learning_rate": 2.4991259757985783e-05,
"loss": 2.9785,
"step": 12080
},
{
"epoch": 2.61,
"learning_rate": 2.4956298811294755e-05,
"loss": 2.9609,
"step": 12090
},
{
"epoch": 2.61,
"learning_rate": 2.4921337950067105e-05,
"loss": 2.9602,
"step": 12100
},
{
"epoch": 2.61,
"learning_rate": 2.4886377242673374e-05,
"loss": 2.9535,
"step": 12110
},
{
"epoch": 2.62,
"learning_rate": 2.485141675748378e-05,
"loss": 2.9153,
"step": 12120
},
{
"epoch": 2.62,
"learning_rate": 2.481645656286812e-05,
"loss": 2.984,
"step": 12130
},
{
"epoch": 2.62,
"learning_rate": 2.4781496727195633e-05,
"loss": 2.9924,
"step": 12140
},
{
"epoch": 2.62,
"learning_rate": 2.474653731883484e-05,
"loss": 2.9966,
"step": 12150
},
{
"epoch": 2.63,
"learning_rate": 2.4711578406153425e-05,
"loss": 2.9839,
"step": 12160
},
{
"epoch": 2.63,
"learning_rate": 2.4676620057518113e-05,
"loss": 2.9484,
"step": 12170
},
{
"epoch": 2.63,
"learning_rate": 2.4641662341294515e-05,
"loss": 2.957,
"step": 12180
},
{
"epoch": 2.63,
"learning_rate": 2.460670532584702e-05,
"loss": 2.914,
"step": 12190
},
{
"epoch": 2.63,
"learning_rate": 2.4571749079538628e-05,
"loss": 3.035,
"step": 12200
},
{
"epoch": 2.64,
"learning_rate": 2.453679367073085e-05,
"loss": 2.9753,
"step": 12210
},
{
"epoch": 2.64,
"learning_rate": 2.4501839167783552e-05,
"loss": 2.9676,
"step": 12220
},
{
"epoch": 2.64,
"learning_rate": 2.4466885639054836e-05,
"loss": 2.9563,
"step": 12230
},
{
"epoch": 2.64,
"learning_rate": 2.4438923563444667e-05,
"loss": 2.9472,
"step": 12240
},
{
"epoch": 2.64,
"learning_rate": 2.4403971960565624e-05,
"loss": 3.0442,
"step": 12250
},
{
"epoch": 2.65,
"learning_rate": 2.4372516512623623e-05,
"loss": 2.9685,
"step": 12260
},
{
"epoch": 2.65,
"learning_rate": 2.4337567182844322e-05,
"loss": 2.9619,
"step": 12270
},
{
"epoch": 2.65,
"learning_rate": 2.430261914853893e-05,
"loss": 2.974,
"step": 12280
},
{
"epoch": 2.65,
"learning_rate": 2.4267672478052878e-05,
"loss": 2.9747,
"step": 12290
},
{
"epoch": 2.66,
"learning_rate": 2.4232727239728952e-05,
"loss": 2.9586,
"step": 12300
},
{
"epoch": 2.66,
"learning_rate": 2.4197783501907127e-05,
"loss": 2.9688,
"step": 12310
},
{
"epoch": 2.66,
"learning_rate": 2.416284133292446e-05,
"loss": 2.957,
"step": 12320
},
{
"epoch": 2.66,
"learning_rate": 2.4127900801114915e-05,
"loss": 2.9586,
"step": 12330
},
{
"epoch": 2.66,
"learning_rate": 2.4092961974809264e-05,
"loss": 2.9809,
"step": 12340
},
{
"epoch": 2.67,
"learning_rate": 2.4058024922334954e-05,
"loss": 2.9767,
"step": 12350
},
{
"epoch": 2.67,
"learning_rate": 2.4023089712015947e-05,
"loss": 2.9978,
"step": 12360
},
{
"epoch": 2.67,
"learning_rate": 2.3988156412172613e-05,
"loss": 2.9979,
"step": 12370
},
{
"epoch": 2.67,
"learning_rate": 2.3953225091121587e-05,
"loss": 3.0057,
"step": 12380
},
{
"epoch": 2.67,
"learning_rate": 2.3921788650503654e-05,
"loss": 2.984,
"step": 12390
},
{
"epoch": 2.68,
"learning_rate": 2.3886861277356354e-05,
"loss": 2.9516,
"step": 12400
},
{
"epoch": 2.68,
"learning_rate": 2.3851936081097224e-05,
"loss": 2.9965,
"step": 12410
},
{
"epoch": 2.68,
"learning_rate": 2.381701313002705e-05,
"loss": 2.9704,
"step": 12420
},
{
"epoch": 2.68,
"learning_rate": 2.3782092492442223e-05,
"loss": 3.0131,
"step": 12430
},
{
"epoch": 2.69,
"learning_rate": 2.374717423663461e-05,
"loss": 2.9642,
"step": 12440
},
{
"epoch": 2.69,
"learning_rate": 2.3712258430891425e-05,
"loss": 2.955,
"step": 12450
},
{
"epoch": 2.69,
"learning_rate": 2.3680836356963154e-05,
"loss": 2.9722,
"step": 12460
},
{
"epoch": 2.69,
"learning_rate": 2.364592539445637e-05,
"loss": 2.9271,
"step": 12470
},
{
"epoch": 2.69,
"learning_rate": 2.3611017080019346e-05,
"loss": 2.9518,
"step": 12480
},
{
"epoch": 2.7,
"learning_rate": 2.357611148191986e-05,
"loss": 2.9973,
"step": 12490
},
{
"epoch": 2.7,
"learning_rate": 2.3541208668420353e-05,
"loss": 2.9441,
"step": 12500
},
{
"epoch": 2.7,
"learning_rate": 2.3506308707777848e-05,
"loss": 2.9184,
"step": 12510
},
{
"epoch": 2.7,
"learning_rate": 2.347141166824377e-05,
"loss": 2.979,
"step": 12520
},
{
"epoch": 2.71,
"learning_rate": 2.3436517618063843e-05,
"loss": 2.9415,
"step": 12530
},
{
"epoch": 2.71,
"learning_rate": 2.3401626625477935e-05,
"loss": 2.9878,
"step": 12540
},
{
"epoch": 2.71,
"learning_rate": 2.337022740278894e-05,
"loss": 2.9839,
"step": 12550
},
{
"epoch": 2.71,
"learning_rate": 2.3335342407611002e-05,
"loss": 2.986,
"step": 12560
},
{
"epoch": 2.71,
"learning_rate": 2.3300460667888425e-05,
"loss": 2.9736,
"step": 12570
},
{
"epoch": 2.72,
"learning_rate": 2.3265582251837007e-05,
"loss": 3.0035,
"step": 12580
},
{
"epoch": 2.72,
"learning_rate": 2.3230707227666044e-05,
"loss": 2.9714,
"step": 12590
},
{
"epoch": 2.72,
"learning_rate": 2.319583566357821e-05,
"loss": 2.9339,
"step": 12600
},
{
"epoch": 2.72,
"learning_rate": 2.316096762776939e-05,
"loss": 2.9336,
"step": 12610
},
{
"epoch": 2.72,
"learning_rate": 2.3126103188428595e-05,
"loss": 2.9802,
"step": 12620
},
{
"epoch": 2.73,
"learning_rate": 2.309124241373778e-05,
"loss": 2.9626,
"step": 12630
},
{
"epoch": 2.73,
"learning_rate": 2.305638537187175e-05,
"loss": 2.9821,
"step": 12640
},
{
"epoch": 2.73,
"learning_rate": 2.3021532130998003e-05,
"loss": 2.9875,
"step": 12650
},
{
"epoch": 2.73,
"learning_rate": 2.2986682759276602e-05,
"loss": 2.8992,
"step": 12660
},
{
"epoch": 2.74,
"learning_rate": 2.295183732486005e-05,
"loss": 2.914,
"step": 12670
},
{
"epoch": 2.74,
"learning_rate": 2.2916995895893142e-05,
"loss": 2.9702,
"step": 12680
},
{
"epoch": 2.74,
"learning_rate": 2.2882158540512843e-05,
"loss": 2.9716,
"step": 12690
},
{
"epoch": 2.74,
"learning_rate": 2.284732532684815e-05,
"loss": 2.9335,
"step": 12700
},
{
"epoch": 2.74,
"learning_rate": 2.281249632301997e-05,
"loss": 2.9802,
"step": 12710
},
{
"epoch": 2.75,
"learning_rate": 2.277767159714096e-05,
"loss": 2.9859,
"step": 12720
},
{
"epoch": 2.75,
"learning_rate": 2.2742851217315425e-05,
"loss": 2.9603,
"step": 12730
},
{
"epoch": 2.75,
"learning_rate": 2.2708035251639166e-05,
"loss": 2.9651,
"step": 12740
},
{
"epoch": 2.75,
"learning_rate": 2.2676704712902387e-05,
"loss": 2.9798,
"step": 12750
},
{
"epoch": 2.75,
"learning_rate": 2.2641897321682655e-05,
"loss": 2.9575,
"step": 12760
},
{
"epoch": 2.76,
"learning_rate": 2.2607094542040733e-05,
"loss": 2.9081,
"step": 12770
},
{
"epoch": 2.76,
"learning_rate": 2.2572296442037997e-05,
"loss": 2.9723,
"step": 12780
},
{
"epoch": 2.76,
"learning_rate": 2.2537503089726675e-05,
"loss": 2.9559,
"step": 12790
},
{
"epoch": 2.76,
"learning_rate": 2.250271455314972e-05,
"loss": 2.9917,
"step": 12800
},
{
"epoch": 2.77,
"learning_rate": 2.246793090034064e-05,
"loss": 2.9842,
"step": 12810
},
{
"epoch": 2.77,
"learning_rate": 2.2433152199323437e-05,
"loss": 2.973,
"step": 12820
},
{
"epoch": 2.77,
"learning_rate": 2.2398378518112394e-05,
"loss": 2.9844,
"step": 12830
},
{
"epoch": 2.77,
"learning_rate": 2.236360992471198e-05,
"loss": 2.9732,
"step": 12840
},
{
"epoch": 2.77,
"learning_rate": 2.2328846487116726e-05,
"loss": 2.9647,
"step": 12850
},
{
"epoch": 2.78,
"learning_rate": 2.2294088273311072e-05,
"loss": 2.9826,
"step": 12860
},
{
"epoch": 2.78,
"learning_rate": 2.2266285509074092e-05,
"loss": 2.9998,
"step": 12870
},
{
"epoch": 2.78,
"learning_rate": 2.223153686937783e-05,
"loss": 2.9632,
"step": 12880
},
{
"epoch": 2.78,
"learning_rate": 2.219679364377284e-05,
"loss": 2.9326,
"step": 12890
},
{
"epoch": 2.78,
"learning_rate": 2.216205590020404e-05,
"loss": 2.9847,
"step": 12900
},
{
"epoch": 2.79,
"learning_rate": 2.212732370660562e-05,
"loss": 3.0105,
"step": 12910
},
{
"epoch": 2.79,
"learning_rate": 2.2092597130900923e-05,
"loss": 2.9841,
"step": 12920
},
{
"epoch": 2.79,
"learning_rate": 2.2057876241002302e-05,
"loss": 3.0224,
"step": 12930
},
{
"epoch": 2.79,
"learning_rate": 2.2026632357578348e-05,
"loss": 2.9372,
"step": 12940
},
{
"epoch": 2.8,
"learning_rate": 2.1991922457769885e-05,
"loss": 2.9802,
"step": 12950
},
{
"epoch": 2.8,
"learning_rate": 2.1957218440649964e-05,
"loss": 2.9689,
"step": 12960
},
{
"epoch": 2.8,
"learning_rate": 2.192252037408684e-05,
"loss": 2.9609,
"step": 12970
},
{
"epoch": 2.8,
"learning_rate": 2.188782832593711e-05,
"loss": 2.8893,
"step": 12980
},
{
"epoch": 2.8,
"learning_rate": 2.18531423640456e-05,
"loss": 2.9401,
"step": 12990
},
{
"epoch": 2.81,
"learning_rate": 2.1818462556245246e-05,
"loss": 2.9864,
"step": 13000
},
{
"epoch": 2.81,
"learning_rate": 2.178378897035694e-05,
"loss": 2.9502,
"step": 13010
},
{
"epoch": 2.81,
"learning_rate": 2.1749121674189405e-05,
"loss": 2.9565,
"step": 13020
},
{
"epoch": 2.81,
"learning_rate": 2.171446073553907e-05,
"loss": 2.9495,
"step": 13030
},
{
"epoch": 2.82,
"learning_rate": 2.1679806222189924e-05,
"loss": 2.99,
"step": 13040
},
{
"epoch": 2.82,
"learning_rate": 2.1645158201913402e-05,
"loss": 2.9752,
"step": 13050
},
{
"epoch": 2.82,
"learning_rate": 2.1610516742468227e-05,
"loss": 2.9661,
"step": 13060
},
{
"epoch": 2.82,
"learning_rate": 2.15758819116003e-05,
"loss": 2.9298,
"step": 13070
},
{
"epoch": 2.82,
"learning_rate": 2.1541253777042552e-05,
"loss": 2.9756,
"step": 13080
},
{
"epoch": 2.83,
"learning_rate": 2.150663240651483e-05,
"loss": 2.9604,
"step": 13090
},
{
"epoch": 2.83,
"learning_rate": 2.147547901224534e-05,
"loss": 2.9754,
"step": 13100
},
{
"epoch": 2.83,
"learning_rate": 2.144087067989534e-05,
"loss": 2.9765,
"step": 13110
},
{
"epoch": 2.83,
"learning_rate": 2.140626930788762e-05,
"loss": 2.9741,
"step": 13120
},
{
"epoch": 2.83,
"learning_rate": 2.1375134080100807e-05,
"loss": 2.9661,
"step": 13130
},
{
"epoch": 2.84,
"learning_rate": 2.1340546119156034e-05,
"loss": 2.9765,
"step": 13140
},
{
"epoch": 2.84,
"learning_rate": 2.1305965314751338e-05,
"loss": 2.9597,
"step": 13150
},
{
"epoch": 2.84,
"learning_rate": 2.1271391734513978e-05,
"loss": 2.9504,
"step": 13160
},
{
"epoch": 2.84,
"learning_rate": 2.1236825446057106e-05,
"loss": 2.9579,
"step": 13170
},
{
"epoch": 2.85,
"learning_rate": 2.1202266516979636e-05,
"loss": 2.9713,
"step": 13180
},
{
"epoch": 2.85,
"learning_rate": 2.1167715014866057e-05,
"loss": 2.9515,
"step": 13190
},
{
"epoch": 2.85,
"learning_rate": 2.113317100728634e-05,
"loss": 2.9609,
"step": 13200
},
{
"epoch": 2.85,
"learning_rate": 2.109863456179581e-05,
"loss": 2.9729,
"step": 13210
},
{
"epoch": 2.85,
"learning_rate": 2.1064105745934992e-05,
"loss": 2.967,
"step": 13220
},
{
"epoch": 2.86,
"learning_rate": 2.10295846272295e-05,
"loss": 2.9648,
"step": 13230
},
{
"epoch": 2.86,
"learning_rate": 2.0995071273189886e-05,
"loss": 2.9636,
"step": 13240
},
{
"epoch": 2.86,
"learning_rate": 2.0964015949128805e-05,
"loss": 2.9774,
"step": 13250
},
{
"epoch": 2.86,
"learning_rate": 2.092951753389146e-05,
"loss": 2.9446,
"step": 13260
},
{
"epoch": 2.86,
"learning_rate": 2.089502707901427e-05,
"loss": 2.9445,
"step": 13270
},
{
"epoch": 2.87,
"learning_rate": 2.0863992531480957e-05,
"loss": 2.9544,
"step": 13280
},
{
"epoch": 2.87,
"learning_rate": 2.0829517387101454e-05,
"loss": 2.9651,
"step": 13290
},
{
"epoch": 2.87,
"learning_rate": 2.0795050398645455e-05,
"loss": 2.9269,
"step": 13300
},
{
"epoch": 2.87,
"learning_rate": 2.0760591633517657e-05,
"loss": 2.9685,
"step": 13310
},
{
"epoch": 2.88,
"learning_rate": 2.0726141159106672e-05,
"loss": 2.9664,
"step": 13320
},
{
"epoch": 2.88,
"learning_rate": 2.069169904278491e-05,
"loss": 2.9594,
"step": 13330
},
{
"epoch": 2.88,
"learning_rate": 2.065726535190842e-05,
"loss": 2.9107,
"step": 13340
},
{
"epoch": 2.88,
"learning_rate": 2.062284015381679e-05,
"loss": 3.0168,
"step": 13350
},
{
"epoch": 2.88,
"learning_rate": 2.0588423515832982e-05,
"loss": 2.9776,
"step": 13360
},
{
"epoch": 2.89,
"learning_rate": 2.0557455916168696e-05,
"loss": 2.9962,
"step": 13370
},
{
"epoch": 2.89,
"learning_rate": 2.052305572780449e-05,
"loss": 2.9517,
"step": 13380
},
{
"epoch": 2.89,
"learning_rate": 2.048866429468959e-05,
"loss": 2.9529,
"step": 13390
},
{
"epoch": 2.89,
"learning_rate": 2.0454281684080935e-05,
"loss": 2.9453,
"step": 13400
},
{
"epoch": 2.9,
"learning_rate": 2.0419907963218214e-05,
"loss": 2.9512,
"step": 13410
},
{
"epoch": 2.9,
"learning_rate": 2.0385543199323726e-05,
"loss": 2.9261,
"step": 13420
},
{
"epoch": 2.9,
"learning_rate": 2.035118745960226e-05,
"loss": 2.9249,
"step": 13430
},
{
"epoch": 2.9,
"learning_rate": 2.0316840811240944e-05,
"loss": 2.9737,
"step": 13440
},
{
"epoch": 2.9,
"learning_rate": 2.0282503321409148e-05,
"loss": 2.9823,
"step": 13450
},
{
"epoch": 2.91,
"learning_rate": 2.0248175057258304e-05,
"loss": 2.9347,
"step": 13460
},
{
"epoch": 2.91,
"learning_rate": 2.0213856085921822e-05,
"loss": 2.9832,
"step": 13470
},
{
"epoch": 2.91,
"learning_rate": 2.017954647451493e-05,
"loss": 2.9528,
"step": 13480
},
{
"epoch": 2.91,
"learning_rate": 2.014524629013457e-05,
"loss": 2.9573,
"step": 13490
},
{
"epoch": 2.91,
"learning_rate": 2.0110955599859217e-05,
"loss": 2.9518,
"step": 13500
},
{
"epoch": 2.92,
"learning_rate": 2.0076674470748802e-05,
"loss": 2.9617,
"step": 13510
},
{
"epoch": 2.92,
"learning_rate": 2.0042402969844548e-05,
"loss": 2.9609,
"step": 13520
},
{
"epoch": 2.92,
"learning_rate": 2.0008141164168854e-05,
"loss": 2.9184,
"step": 13530
},
{
"epoch": 2.92,
"learning_rate": 1.9973889120725155e-05,
"loss": 2.9549,
"step": 13540
},
{
"epoch": 2.93,
"learning_rate": 1.993964690649779e-05,
"loss": 2.9534,
"step": 13550
},
{
"epoch": 2.93,
"learning_rate": 1.9905414588451894e-05,
"loss": 2.9696,
"step": 13560
},
{
"epoch": 2.93,
"learning_rate": 1.987119223353323e-05,
"loss": 2.9607,
"step": 13570
},
{
"epoch": 2.93,
"learning_rate": 1.9840400687895193e-05,
"loss": 2.9811,
"step": 13580
},
{
"epoch": 2.93,
"learning_rate": 1.9809617315376432e-05,
"loss": 2.9458,
"step": 13590
},
{
"epoch": 2.94,
"learning_rate": 1.977542321319916e-05,
"loss": 2.9351,
"step": 13600
},
{
"epoch": 2.94,
"learning_rate": 1.974123932836423e-05,
"loss": 2.9564,
"step": 13610
},
{
"epoch": 2.94,
"learning_rate": 1.97070657277227e-05,
"loss": 2.9504,
"step": 13620
},
{
"epoch": 2.94,
"learning_rate": 1.9672902478105502e-05,
"loss": 2.9776,
"step": 13630
},
{
"epoch": 2.94,
"learning_rate": 1.9638749646323343e-05,
"loss": 2.9859,
"step": 13640
},
{
"epoch": 2.95,
"learning_rate": 1.9604607299166543e-05,
"loss": 2.9401,
"step": 13650
},
{
"epoch": 2.95,
"learning_rate": 1.9570475503404918e-05,
"loss": 2.9626,
"step": 13660
},
{
"epoch": 2.95,
"learning_rate": 1.9536354325787658e-05,
"loss": 2.9497,
"step": 13670
},
{
"epoch": 2.95,
"learning_rate": 1.9502243833043177e-05,
"loss": 2.9523,
"step": 13680
},
{
"epoch": 2.96,
"learning_rate": 1.9468144091878997e-05,
"loss": 2.9389,
"step": 13690
},
{
"epoch": 2.96,
"learning_rate": 1.9434055168981627e-05,
"loss": 2.9982,
"step": 13700
},
{
"epoch": 2.96,
"learning_rate": 1.9399977131016404e-05,
"loss": 2.9605,
"step": 13710
},
{
"epoch": 2.96,
"learning_rate": 1.9369316258546448e-05,
"loss": 2.9557,
"step": 13720
},
{
"epoch": 2.96,
"learning_rate": 1.933525908553897e-05,
"loss": 2.9658,
"step": 13730
},
{
"epoch": 2.97,
"learning_rate": 1.930121299067229e-05,
"loss": 2.923,
"step": 13740
},
{
"epoch": 2.97,
"learning_rate": 1.9267178040527986e-05,
"loss": 2.9552,
"step": 13750
},
{
"epoch": 2.97,
"learning_rate": 1.9233154301665853e-05,
"loss": 2.9549,
"step": 13760
},
{
"epoch": 2.97,
"learning_rate": 1.919914184062375e-05,
"loss": 2.9232,
"step": 13770
},
{
"epoch": 2.97,
"learning_rate": 1.9168540323197758e-05,
"loss": 3.0205,
"step": 13780
},
{
"epoch": 2.98,
"learning_rate": 1.9134549473246447e-05,
"loss": 2.9848,
"step": 13790
},
{
"epoch": 2.98,
"learning_rate": 1.9100570093949787e-05,
"loss": 2.9582,
"step": 13800
},
{
"epoch": 2.98,
"learning_rate": 1.90666022517589e-05,
"loss": 2.9316,
"step": 13810
},
{
"epoch": 2.98,
"learning_rate": 1.9032646013102336e-05,
"loss": 3.0137,
"step": 13820
},
{
"epoch": 2.99,
"learning_rate": 1.8998701444385954e-05,
"loss": 2.955,
"step": 13830
},
{
"epoch": 2.99,
"learning_rate": 1.896476861199279e-05,
"loss": 2.9297,
"step": 13840
},
{
"epoch": 2.99,
"learning_rate": 1.893084758228294e-05,
"loss": 2.9196,
"step": 13850
},
{
"epoch": 2.99,
"learning_rate": 1.8896938421593396e-05,
"loss": 2.9727,
"step": 13860
},
{
"epoch": 2.99,
"learning_rate": 1.8863041196237958e-05,
"loss": 2.9468,
"step": 13870
},
{
"epoch": 3.0,
"learning_rate": 1.8829155972507075e-05,
"loss": 2.9886,
"step": 13880
},
{
"epoch": 3.0,
"learning_rate": 1.8798669587308416e-05,
"loss": 2.9291,
"step": 13890
},
{
"epoch": 3.0,
"eval_loss": 2.950490713119507,
"eval_runtime": 267.3971,
"eval_samples_per_second": 554.307,
"eval_steps_per_second": 17.323,
"step": 13896
},
{
"epoch": 3.0,
"learning_rate": 1.8764807349210213e-05,
"loss": 2.9599,
"step": 13900
},
{
"epoch": 3.0,
"learning_rate": 1.873434175867729e-05,
"loss": 2.9698,
"step": 13910
},
{
"epoch": 3.01,
"learning_rate": 1.8700502745273205e-05,
"loss": 2.9227,
"step": 13920
},
{
"epoch": 3.01,
"learning_rate": 1.8666676051358846e-05,
"loss": 2.9592,
"step": 13930
},
{
"epoch": 3.01,
"learning_rate": 1.8632861743086737e-05,
"loss": 2.9496,
"step": 13940
},
{
"epoch": 3.01,
"learning_rate": 1.859905988658517e-05,
"loss": 2.9259,
"step": 13950
},
{
"epoch": 3.01,
"learning_rate": 1.856527054795809e-05,
"loss": 2.9633,
"step": 13960
},
{
"epoch": 3.02,
"learning_rate": 1.8531493793284964e-05,
"loss": 2.971,
"step": 13970
},
{
"epoch": 3.02,
"learning_rate": 1.849772968862065e-05,
"loss": 2.9677,
"step": 13980
},
{
"epoch": 3.02,
"learning_rate": 1.846397829999526e-05,
"loss": 2.9595,
"step": 13990
},
{
"epoch": 3.02,
"learning_rate": 1.8430239693414048e-05,
"loss": 2.933,
"step": 14000
},
{
"epoch": 3.02,
"learning_rate": 1.8396513934857258e-05,
"loss": 2.9694,
"step": 14010
},
{
"epoch": 3.03,
"learning_rate": 1.836280109028003e-05,
"loss": 3.0011,
"step": 14020
},
{
"epoch": 3.03,
"learning_rate": 1.8329101225612223e-05,
"loss": 2.9621,
"step": 14030
},
{
"epoch": 3.03,
"learning_rate": 1.8295414406758326e-05,
"loss": 2.932,
"step": 14040
},
{
"epoch": 3.03,
"learning_rate": 1.826174069959732e-05,
"loss": 2.9339,
"step": 14050
},
{
"epoch": 3.04,
"learning_rate": 1.8228080169982526e-05,
"loss": 2.9872,
"step": 14060
},
{
"epoch": 3.04,
"learning_rate": 1.819443288374152e-05,
"loss": 2.9533,
"step": 14070
},
{
"epoch": 3.04,
"learning_rate": 1.816752463419743e-05,
"loss": 2.9662,
"step": 14080
},
{
"epoch": 3.04,
"learning_rate": 1.8133901351831935e-05,
"loss": 2.9518,
"step": 14090
},
{
"epoch": 3.04,
"learning_rate": 1.8100291497019203e-05,
"loss": 2.9756,
"step": 14100
},
{
"epoch": 3.05,
"learning_rate": 1.8066695135487688e-05,
"loss": 2.9293,
"step": 14110
},
{
"epoch": 3.05,
"learning_rate": 1.8033112332939478e-05,
"loss": 2.8975,
"step": 14120
},
{
"epoch": 3.05,
"learning_rate": 1.799954315505012e-05,
"loss": 2.9292,
"step": 14130
},
{
"epoch": 3.05,
"learning_rate": 1.796598766746853e-05,
"loss": 2.9415,
"step": 14140
},
{
"epoch": 3.05,
"learning_rate": 1.7932445935816848e-05,
"loss": 2.927,
"step": 14150
},
{
"epoch": 3.06,
"learning_rate": 1.7898918025690308e-05,
"loss": 2.9229,
"step": 14160
},
{
"epoch": 3.06,
"learning_rate": 1.7868754778173104e-05,
"loss": 2.977,
"step": 14170
},
{
"epoch": 3.06,
"learning_rate": 1.7835253309562305e-05,
"loss": 2.9229,
"step": 14180
},
{
"epoch": 3.06,
"learning_rate": 1.7801765852549524e-05,
"loss": 2.9442,
"step": 14190
},
{
"epoch": 3.07,
"learning_rate": 1.7768292472623858e-05,
"loss": 2.9566,
"step": 14200
},
{
"epoch": 3.07,
"learning_rate": 1.7734833235246866e-05,
"loss": 2.9526,
"step": 14210
},
{
"epoch": 3.07,
"learning_rate": 1.7701388205852476e-05,
"loss": 2.9593,
"step": 14220
},
{
"epoch": 3.07,
"learning_rate": 1.7667957449846794e-05,
"loss": 2.9361,
"step": 14230
},
{
"epoch": 3.07,
"learning_rate": 1.7634541032608042e-05,
"loss": 2.9654,
"step": 14240
},
{
"epoch": 3.08,
"learning_rate": 1.7601139019486386e-05,
"loss": 2.959,
"step": 14250
},
{
"epoch": 3.08,
"learning_rate": 1.7567751475803818e-05,
"loss": 2.9235,
"step": 14260
},
{
"epoch": 3.08,
"learning_rate": 1.753437846685405e-05,
"loss": 2.9827,
"step": 14270
},
{
"epoch": 3.08,
"learning_rate": 1.7501020057902358e-05,
"loss": 2.9554,
"step": 14280
},
{
"epoch": 3.09,
"learning_rate": 1.7467676314185462e-05,
"loss": 2.9388,
"step": 14290
},
{
"epoch": 3.09,
"learning_rate": 1.7434347300911414e-05,
"loss": 2.9282,
"step": 14300
},
{
"epoch": 3.09,
"learning_rate": 1.740103308325945e-05,
"loss": 2.9948,
"step": 14310
},
{
"epoch": 3.09,
"learning_rate": 1.736773372637988e-05,
"loss": 2.9318,
"step": 14320
},
{
"epoch": 3.09,
"learning_rate": 1.733444929539394e-05,
"loss": 2.9628,
"step": 14330
},
{
"epoch": 3.1,
"learning_rate": 1.7301179855393683e-05,
"loss": 2.9688,
"step": 14340
},
{
"epoch": 3.1,
"learning_rate": 1.7267925471441848e-05,
"loss": 3.0025,
"step": 14350
},
{
"epoch": 3.1,
"learning_rate": 1.7234686208571727e-05,
"loss": 2.9524,
"step": 14360
},
{
"epoch": 3.1,
"learning_rate": 1.720146213178703e-05,
"loss": 2.94,
"step": 14370
},
{
"epoch": 3.1,
"learning_rate": 1.7171573500485493e-05,
"loss": 2.9358,
"step": 14380
},
{
"epoch": 3.11,
"learning_rate": 1.7138378456241898e-05,
"loss": 2.9222,
"step": 14390
},
{
"epoch": 3.11,
"learning_rate": 1.710519878642613e-05,
"loss": 2.9345,
"step": 14400
},
{
"epoch": 3.11,
"learning_rate": 1.7075350282357605e-05,
"loss": 2.9327,
"step": 14410
},
{
"epoch": 3.11,
"learning_rate": 1.7042200002693908e-05,
"loss": 2.936,
"step": 14420
},
{
"epoch": 3.12,
"learning_rate": 1.7009065285547577e-05,
"loss": 2.9037,
"step": 14430
},
{
"epoch": 3.12,
"learning_rate": 1.6975946195717856e-05,
"loss": 2.9838,
"step": 14440
},
{
"epoch": 3.12,
"learning_rate": 1.6942842797973464e-05,
"loss": 2.9122,
"step": 14450
},
{
"epoch": 3.12,
"learning_rate": 1.690975515705241e-05,
"loss": 2.9558,
"step": 14460
},
{
"epoch": 3.12,
"learning_rate": 1.6876683337661903e-05,
"loss": 2.8968,
"step": 14470
},
{
"epoch": 3.13,
"learning_rate": 1.6843627404478205e-05,
"loss": 2.9247,
"step": 14480
},
{
"epoch": 3.13,
"learning_rate": 1.681058742214651e-05,
"loss": 2.9971,
"step": 14490
},
{
"epoch": 3.13,
"learning_rate": 1.677756345528082e-05,
"loss": 2.9689,
"step": 14500
},
{
"epoch": 3.13,
"learning_rate": 1.6744555568463817e-05,
"loss": 2.9537,
"step": 14510
},
{
"epoch": 3.13,
"learning_rate": 1.6711563826246728e-05,
"loss": 2.9995,
"step": 14520
},
{
"epoch": 3.14,
"learning_rate": 1.6681885115210432e-05,
"loss": 2.9232,
"step": 14530
},
{
"epoch": 3.14,
"learning_rate": 1.6648924225458668e-05,
"loss": 2.9527,
"step": 14540
},
{
"epoch": 3.14,
"learning_rate": 1.661927338638032e-05,
"loss": 2.9355,
"step": 14550
},
{
"epoch": 3.14,
"learning_rate": 1.6586343581791934e-05,
"loss": 2.9694,
"step": 14560
},
{
"epoch": 3.15,
"learning_rate": 1.6553430231207677e-05,
"loss": 2.9544,
"step": 14570
},
{
"epoch": 3.15,
"learning_rate": 1.65205333989939e-05,
"loss": 2.9396,
"step": 14580
},
{
"epoch": 3.15,
"learning_rate": 1.6487653149484655e-05,
"loss": 2.9752,
"step": 14590
},
{
"epoch": 3.15,
"learning_rate": 1.645478954698157e-05,
"loss": 2.9217,
"step": 14600
},
{
"epoch": 3.15,
"learning_rate": 1.6421942655753708e-05,
"loss": 2.9358,
"step": 14610
},
{
"epoch": 3.16,
"learning_rate": 1.6389112540037448e-05,
"loss": 2.9877,
"step": 14620
},
{
"epoch": 3.16,
"learning_rate": 1.6356299264036384e-05,
"loss": 2.9351,
"step": 14630
},
{
"epoch": 3.16,
"learning_rate": 1.632350289192116e-05,
"loss": 2.9321,
"step": 14640
},
{
"epoch": 3.16,
"learning_rate": 1.6290723487829364e-05,
"loss": 2.9697,
"step": 14650
},
{
"epoch": 3.16,
"learning_rate": 1.62579611158654e-05,
"loss": 2.9417,
"step": 14660
},
{
"epoch": 3.17,
"learning_rate": 1.6225215840100365e-05,
"loss": 2.982,
"step": 14670
},
{
"epoch": 3.17,
"learning_rate": 1.619248772457193e-05,
"loss": 2.9768,
"step": 14680
},
{
"epoch": 3.17,
"learning_rate": 1.615977683328419e-05,
"loss": 2.921,
"step": 14690
},
{
"epoch": 3.17,
"learning_rate": 1.6127083230207574e-05,
"loss": 2.9497,
"step": 14700
},
{
"epoch": 3.18,
"learning_rate": 1.609440697927869e-05,
"loss": 2.9947,
"step": 14710
},
{
"epoch": 3.18,
"learning_rate": 1.606174814440021e-05,
"loss": 2.9278,
"step": 14720
},
{
"epoch": 3.18,
"learning_rate": 1.602910678944076e-05,
"loss": 2.9758,
"step": 14730
},
{
"epoch": 3.18,
"learning_rate": 1.599648297823477e-05,
"loss": 2.9139,
"step": 14740
},
{
"epoch": 3.18,
"learning_rate": 1.5963876774582366e-05,
"loss": 2.9303,
"step": 14750
},
{
"epoch": 3.19,
"learning_rate": 1.5937804531950724e-05,
"loss": 2.9578,
"step": 14760
},
{
"epoch": 3.19,
"learning_rate": 1.590523018256109e-05,
"loss": 2.9922,
"step": 14770
},
{
"epoch": 3.19,
"learning_rate": 1.58726736191818e-05,
"loss": 2.9445,
"step": 14780
},
{
"epoch": 3.19,
"learning_rate": 1.5840134905481467e-05,
"loss": 2.9125,
"step": 14790
},
{
"epoch": 3.2,
"learning_rate": 1.58076141050938e-05,
"loss": 2.943,
"step": 14800
},
{
"epoch": 3.2,
"learning_rate": 1.5778360753192246e-05,
"loss": 2.95,
"step": 14810
},
{
"epoch": 3.2,
"learning_rate": 1.574587416328392e-05,
"loss": 2.9663,
"step": 14820
},
{
"epoch": 3.2,
"learning_rate": 1.5713405671027454e-05,
"loss": 2.9534,
"step": 14830
},
{
"epoch": 3.2,
"learning_rate": 1.5680955339919222e-05,
"loss": 2.9545,
"step": 14840
},
{
"epoch": 3.21,
"learning_rate": 1.564852323342009e-05,
"loss": 2.97,
"step": 14850
},
{
"epoch": 3.21,
"learning_rate": 1.5616109414955267e-05,
"loss": 2.9305,
"step": 14860
},
{
"epoch": 3.21,
"learning_rate": 1.558371394791422e-05,
"loss": 2.9493,
"step": 14870
},
{
"epoch": 3.21,
"learning_rate": 1.555133689565051e-05,
"loss": 2.9414,
"step": 14880
},
{
"epoch": 3.21,
"learning_rate": 1.5518978321481694e-05,
"loss": 2.9441,
"step": 14890
},
{
"epoch": 3.22,
"learning_rate": 1.5486638288689194e-05,
"loss": 2.9164,
"step": 14900
},
{
"epoch": 3.22,
"learning_rate": 1.5454316860518158e-05,
"loss": 2.935,
"step": 14910
},
{
"epoch": 3.22,
"learning_rate": 1.5422014100177368e-05,
"loss": 2.9488,
"step": 14920
},
{
"epoch": 3.22,
"learning_rate": 1.5389730070839085e-05,
"loss": 2.9362,
"step": 14930
},
{
"epoch": 3.23,
"learning_rate": 1.5357464835638956e-05,
"loss": 2.9833,
"step": 14940
},
{
"epoch": 3.23,
"learning_rate": 1.5325218457675862e-05,
"loss": 2.9405,
"step": 14950
},
{
"epoch": 3.23,
"learning_rate": 1.5292991000011804e-05,
"loss": 2.9226,
"step": 14960
},
{
"epoch": 3.23,
"learning_rate": 1.526078252567178e-05,
"loss": 2.9192,
"step": 14970
},
{
"epoch": 3.23,
"learning_rate": 1.5228593097643672e-05,
"loss": 2.8906,
"step": 14980
},
{
"epoch": 3.24,
"learning_rate": 1.5199638949044533e-05,
"loss": 2.9223,
"step": 14990
},
{
"epoch": 3.24,
"learning_rate": 1.5167485882407173e-05,
"loss": 2.9354,
"step": 15000
},
{
"epoch": 3.24,
"learning_rate": 1.5135352044535506e-05,
"loss": 2.9725,
"step": 15010
},
{
"epoch": 3.24,
"learning_rate": 1.5103237498271438e-05,
"loss": 2.9002,
"step": 15020
},
{
"epoch": 3.24,
"learning_rate": 1.5071142306419173e-05,
"loss": 2.8888,
"step": 15030
},
{
"epoch": 3.25,
"learning_rate": 1.5039066531745033e-05,
"loss": 2.9215,
"step": 15040
},
{
"epoch": 3.25,
"learning_rate": 1.50070102369774e-05,
"loss": 2.9335,
"step": 15050
},
{
"epoch": 3.25,
"learning_rate": 1.4974973484806549e-05,
"loss": 2.9274,
"step": 15060
},
{
"epoch": 3.25,
"learning_rate": 1.4942956337884523e-05,
"loss": 2.9606,
"step": 15070
},
{
"epoch": 3.26,
"learning_rate": 1.4910958858825042e-05,
"loss": 2.9339,
"step": 15080
},
{
"epoch": 3.26,
"learning_rate": 1.4882177995413226e-05,
"loss": 2.9687,
"step": 15090
},
{
"epoch": 3.26,
"learning_rate": 1.4853413159247149e-05,
"loss": 2.9164,
"step": 15100
},
{
"epoch": 3.26,
"learning_rate": 1.4824664395892068e-05,
"loss": 2.9394,
"step": 15110
},
{
"epoch": 3.26,
"learning_rate": 1.4792740231713431e-05,
"loss": 2.9811,
"step": 15120
},
{
"epoch": 3.27,
"learning_rate": 1.4760836029164552e-05,
"loss": 2.9746,
"step": 15130
},
{
"epoch": 3.27,
"learning_rate": 1.4728951850638262e-05,
"loss": 2.8958,
"step": 15140
},
{
"epoch": 3.27,
"learning_rate": 1.469708775848824e-05,
"loss": 2.9661,
"step": 15150
},
{
"epoch": 3.27,
"learning_rate": 1.4665243815028889e-05,
"loss": 2.8788,
"step": 15160
},
{
"epoch": 3.28,
"learning_rate": 1.4633420082535193e-05,
"loss": 2.9395,
"step": 15170
},
{
"epoch": 3.28,
"learning_rate": 1.4601616623242617e-05,
"loss": 2.9269,
"step": 15180
},
{
"epoch": 3.28,
"learning_rate": 1.4569833499346982e-05,
"loss": 2.9426,
"step": 15190
},
{
"epoch": 3.28,
"learning_rate": 1.4538070773004336e-05,
"loss": 2.8723,
"step": 15200
},
{
"epoch": 3.28,
"learning_rate": 1.4506328506330841e-05,
"loss": 2.9177,
"step": 15210
},
{
"epoch": 3.29,
"learning_rate": 1.4474606761402646e-05,
"loss": 2.9409,
"step": 15220
},
{
"epoch": 3.29,
"learning_rate": 1.4442905600255764e-05,
"loss": 2.9809,
"step": 15230
},
{
"epoch": 3.29,
"learning_rate": 1.441122508488596e-05,
"loss": 2.9192,
"step": 15240
},
{
"epoch": 3.29,
"learning_rate": 1.4379565277248617e-05,
"loss": 2.9343,
"step": 15250
},
{
"epoch": 3.29,
"learning_rate": 1.4347926239258625e-05,
"loss": 2.9563,
"step": 15260
},
{
"epoch": 3.3,
"learning_rate": 1.4316308032790254e-05,
"loss": 2.9488,
"step": 15270
},
{
"epoch": 3.3,
"learning_rate": 1.4287869509025958e-05,
"loss": 2.921,
"step": 15280
},
{
"epoch": 3.3,
"learning_rate": 1.4256291052766292e-05,
"loss": 2.9591,
"step": 15290
},
{
"epoch": 3.3,
"learning_rate": 1.4224733607232819e-05,
"loss": 2.9777,
"step": 15300
},
{
"epoch": 3.31,
"learning_rate": 1.4196349921431662e-05,
"loss": 2.966,
"step": 15310
},
{
"epoch": 3.31,
"learning_rate": 1.416483256626793e-05,
"loss": 2.9372,
"step": 15320
},
{
"epoch": 3.31,
"learning_rate": 1.4133336400689404e-05,
"loss": 2.9311,
"step": 15330
},
{
"epoch": 3.31,
"learning_rate": 1.4101861486290971e-05,
"loss": 2.9354,
"step": 15340
},
{
"epoch": 3.31,
"learning_rate": 1.4070407884625925e-05,
"loss": 2.9642,
"step": 15350
},
{
"epoch": 3.32,
"learning_rate": 1.4038975657205893e-05,
"loss": 2.976,
"step": 15360
},
{
"epoch": 3.32,
"learning_rate": 1.4007564865500706e-05,
"loss": 2.9731,
"step": 15370
},
{
"epoch": 3.32,
"learning_rate": 1.3976175570938265e-05,
"loss": 2.9504,
"step": 15380
},
{
"epoch": 3.32,
"learning_rate": 1.3944807834904438e-05,
"loss": 2.9945,
"step": 15390
},
{
"epoch": 3.32,
"learning_rate": 1.3913461718742917e-05,
"loss": 2.9507,
"step": 15400
},
{
"epoch": 3.33,
"learning_rate": 1.3885268749854907e-05,
"loss": 2.9867,
"step": 15410
},
{
"epoch": 3.33,
"learning_rate": 1.3853963880301185e-05,
"loss": 2.9413,
"step": 15420
},
{
"epoch": 3.33,
"learning_rate": 1.3822680808277e-05,
"loss": 2.9409,
"step": 15430
},
{
"epoch": 3.33,
"learning_rate": 1.3791419594960489e-05,
"loss": 2.9393,
"step": 15440
},
{
"epoch": 3.34,
"learning_rate": 1.3760180301487025e-05,
"loss": 2.9382,
"step": 15450
},
{
"epoch": 3.34,
"learning_rate": 1.3728962988949146e-05,
"loss": 2.9735,
"step": 15460
},
{
"epoch": 3.34,
"learning_rate": 1.3697767718396374e-05,
"loss": 2.947,
"step": 15470
},
{
"epoch": 3.34,
"learning_rate": 1.3666594550835139e-05,
"loss": 2.9703,
"step": 15480
},
{
"epoch": 3.34,
"learning_rate": 1.363855764847477e-05,
"loss": 2.9471,
"step": 15490
},
{
"epoch": 3.35,
"learning_rate": 1.3607426644515248e-05,
"loss": 2.8799,
"step": 15500
},
{
"epoch": 3.35,
"learning_rate": 1.357631792022103e-05,
"loss": 2.936,
"step": 15510
},
{
"epoch": 3.35,
"learning_rate": 1.3545231536429293e-05,
"loss": 2.9656,
"step": 15520
},
{
"epoch": 3.35,
"learning_rate": 1.3514167553933516e-05,
"loss": 2.905,
"step": 15530
},
{
"epoch": 3.35,
"learning_rate": 1.348312603348338e-05,
"loss": 2.8914,
"step": 15540
},
{
"epoch": 3.36,
"learning_rate": 1.3452107035784628e-05,
"loss": 2.9121,
"step": 15550
},
{
"epoch": 3.36,
"learning_rate": 1.3421110621498956e-05,
"loss": 2.9639,
"step": 15560
},
{
"epoch": 3.36,
"learning_rate": 1.3390136851243917e-05,
"loss": 2.9289,
"step": 15570
},
{
"epoch": 3.36,
"learning_rate": 1.3359185785592757e-05,
"loss": 2.9323,
"step": 15580
},
{
"epoch": 3.37,
"learning_rate": 1.3328257485074327e-05,
"loss": 2.9623,
"step": 15590
},
{
"epoch": 3.37,
"learning_rate": 1.3297352010172958e-05,
"loss": 2.9508,
"step": 15600
},
{
"epoch": 3.37,
"learning_rate": 1.3266469421328348e-05,
"loss": 2.9316,
"step": 15610
},
{
"epoch": 3.37,
"learning_rate": 1.3238694708864063e-05,
"loss": 2.9473,
"step": 15620
},
{
"epoch": 3.37,
"learning_rate": 1.3207855769878247e-05,
"loss": 2.9178,
"step": 15630
},
{
"epoch": 3.38,
"learning_rate": 1.3177039891970777e-05,
"loss": 2.92,
"step": 15640
},
{
"epoch": 3.38,
"learning_rate": 1.314624713540612e-05,
"loss": 2.9723,
"step": 15650
},
{
"epoch": 3.38,
"learning_rate": 1.3118553473018185e-05,
"loss": 2.9538,
"step": 15660
},
{
"epoch": 3.38,
"learning_rate": 1.3087804812871471e-05,
"loss": 2.9101,
"step": 15670
},
{
"epoch": 3.39,
"learning_rate": 1.3057079448578427e-05,
"loss": 2.9597,
"step": 15680
},
{
"epoch": 3.39,
"learning_rate": 1.3026377440226518e-05,
"loss": 2.9673,
"step": 15690
},
{
"epoch": 3.39,
"learning_rate": 1.299569884785753e-05,
"loss": 2.9827,
"step": 15700
},
{
"epoch": 3.39,
"learning_rate": 1.2965043731467458e-05,
"loss": 2.94,
"step": 15710
},
{
"epoch": 3.39,
"learning_rate": 1.2934412151006383e-05,
"loss": 2.9432,
"step": 15720
},
{
"epoch": 3.4,
"learning_rate": 1.290380416637837e-05,
"loss": 2.953,
"step": 15730
},
{
"epoch": 3.4,
"learning_rate": 1.2873219837441322e-05,
"loss": 2.8981,
"step": 15740
},
{
"epoch": 3.4,
"learning_rate": 1.2842659224006895e-05,
"loss": 2.9991,
"step": 15750
},
{
"epoch": 3.4,
"learning_rate": 1.2812122385840359e-05,
"loss": 2.9228,
"step": 15760
},
{
"epoch": 3.4,
"learning_rate": 1.2781609382660477e-05,
"loss": 2.9448,
"step": 15770
},
{
"epoch": 3.41,
"learning_rate": 1.2751120274139439e-05,
"loss": 2.9273,
"step": 15780
},
{
"epoch": 3.41,
"learning_rate": 1.272065511990266e-05,
"loss": 2.9634,
"step": 15790
},
{
"epoch": 3.41,
"learning_rate": 1.2690213979528738e-05,
"loss": 2.9639,
"step": 15800
},
{
"epoch": 3.41,
"learning_rate": 1.2662837534248818e-05,
"loss": 2.9732,
"step": 15810
},
{
"epoch": 3.42,
"learning_rate": 1.2632442184185025e-05,
"loss": 2.9421,
"step": 15820
},
{
"epoch": 3.42,
"learning_rate": 1.2602071020496015e-05,
"loss": 2.9071,
"step": 15830
},
{
"epoch": 3.42,
"learning_rate": 1.2571724102576548e-05,
"loss": 2.9135,
"step": 15840
},
{
"epoch": 3.42,
"learning_rate": 1.2544432655633575e-05,
"loss": 2.9801,
"step": 15850
},
{
"epoch": 3.42,
"learning_rate": 1.2514131968138954e-05,
"loss": 2.906,
"step": 15860
},
{
"epoch": 3.43,
"learning_rate": 1.2483855698390184e-05,
"loss": 2.9187,
"step": 15870
},
{
"epoch": 3.43,
"learning_rate": 1.2453603905596467e-05,
"loss": 2.9081,
"step": 15880
},
{
"epoch": 3.43,
"learning_rate": 1.2423376648919134e-05,
"loss": 2.9338,
"step": 15890
},
{
"epoch": 3.43,
"learning_rate": 1.2393173987471535e-05,
"loss": 2.913,
"step": 15900
},
{
"epoch": 3.43,
"learning_rate": 1.2362995980318918e-05,
"loss": 2.9236,
"step": 15910
},
{
"epoch": 3.44,
"learning_rate": 1.233284268647832e-05,
"loss": 2.926,
"step": 15920
},
{
"epoch": 3.44,
"learning_rate": 1.2302714164918441e-05,
"loss": 2.9153,
"step": 15930
},
{
"epoch": 3.44,
"learning_rate": 1.2272610474559545e-05,
"loss": 2.967,
"step": 15940
},
{
"epoch": 3.44,
"learning_rate": 1.2242531674273334e-05,
"loss": 2.9465,
"step": 15950
},
{
"epoch": 3.45,
"learning_rate": 1.221247782288282e-05,
"loss": 2.9336,
"step": 15960
},
{
"epoch": 3.45,
"learning_rate": 1.2182448979162237e-05,
"loss": 2.9414,
"step": 15970
},
{
"epoch": 3.45,
"learning_rate": 1.215544444990901e-05,
"loss": 2.9241,
"step": 15980
},
{
"epoch": 3.45,
"learning_rate": 1.2128460267213379e-05,
"loss": 2.9308,
"step": 15990
},
{
"epoch": 3.45,
"learning_rate": 1.2098501757230813e-05,
"loss": 2.9277,
"step": 16000
},
{
"epoch": 3.46,
"learning_rate": 1.2068568477813263e-05,
"loss": 2.8825,
"step": 16010
},
{
"epoch": 3.46,
"learning_rate": 1.203866048749917e-05,
"loss": 2.9375,
"step": 16020
},
{
"epoch": 3.46,
"learning_rate": 1.2008777844777518e-05,
"loss": 2.932,
"step": 16030
},
{
"epoch": 3.46,
"learning_rate": 1.1978920608087719e-05,
"loss": 2.9585,
"step": 16040
},
{
"epoch": 3.47,
"learning_rate": 1.194908883581949e-05,
"loss": 2.9386,
"step": 16050
},
{
"epoch": 3.47,
"learning_rate": 1.1919282586312769e-05,
"loss": 2.9283,
"step": 16060
},
{
"epoch": 3.47,
"learning_rate": 1.1889501917857564e-05,
"loss": 2.9595,
"step": 16070
},
{
"epoch": 3.47,
"learning_rate": 1.1859746888693865e-05,
"loss": 2.9387,
"step": 16080
},
{
"epoch": 3.47,
"learning_rate": 1.1830017557011516e-05,
"loss": 2.9526,
"step": 16090
},
{
"epoch": 3.48,
"learning_rate": 1.1803283177897373e-05,
"loss": 2.9637,
"step": 16100
},
{
"epoch": 3.48,
"learning_rate": 1.1773602831562538e-05,
"loss": 2.936,
"step": 16110
},
{
"epoch": 3.48,
"learning_rate": 1.1743948351175012e-05,
"loss": 2.9064,
"step": 16120
},
{
"epoch": 3.48,
"learning_rate": 1.1714319794728005e-05,
"loss": 2.9379,
"step": 16130
},
{
"epoch": 3.48,
"learning_rate": 1.1684717220164024e-05,
"loss": 2.9365,
"step": 16140
},
{
"epoch": 3.49,
"learning_rate": 1.1655140685374774e-05,
"loss": 2.9373,
"step": 16150
},
{
"epoch": 3.49,
"learning_rate": 1.1625590248201023e-05,
"loss": 2.9487,
"step": 16160
},
{
"epoch": 3.49,
"learning_rate": 1.1599017215970201e-05,
"loss": 2.9576,
"step": 16170
},
{
"epoch": 3.49,
"learning_rate": 1.1569516523434373e-05,
"loss": 2.9318,
"step": 16180
},
{
"epoch": 3.5,
"learning_rate": 1.1540042095963296e-05,
"loss": 2.9212,
"step": 16190
},
{
"epoch": 3.5,
"learning_rate": 1.1510593991198054e-05,
"loss": 2.9402,
"step": 16200
},
{
"epoch": 3.5,
"learning_rate": 1.148117226672826e-05,
"loss": 2.9364,
"step": 16210
},
{
"epoch": 3.5,
"learning_rate": 1.1451776980091939e-05,
"loss": 2.923,
"step": 16220
},
{
"epoch": 3.5,
"learning_rate": 1.1422408188775415e-05,
"loss": 2.8843,
"step": 16230
},
{
"epoch": 3.51,
"learning_rate": 1.1393065950213197e-05,
"loss": 2.9349,
"step": 16240
},
{
"epoch": 3.51,
"learning_rate": 1.1363750321787864e-05,
"loss": 2.9562,
"step": 16250
},
{
"epoch": 3.51,
"learning_rate": 1.1334461360829953e-05,
"loss": 2.924,
"step": 16260
},
{
"epoch": 3.51,
"learning_rate": 1.1305199124617861e-05,
"loss": 2.9572,
"step": 16270
},
{
"epoch": 3.51,
"learning_rate": 1.1275963670377705e-05,
"loss": 2.9174,
"step": 16280
},
{
"epoch": 3.52,
"learning_rate": 1.1246755055283239e-05,
"loss": 2.9054,
"step": 16290
},
{
"epoch": 3.52,
"learning_rate": 1.1220490296379584e-05,
"loss": 2.9472,
"step": 16300
},
{
"epoch": 3.52,
"learning_rate": 1.1194247365057823e-05,
"loss": 2.9441,
"step": 16310
},
{
"epoch": 3.52,
"learning_rate": 1.116511420328843e-05,
"loss": 2.9758,
"step": 16320
},
{
"epoch": 3.53,
"learning_rate": 1.1136008097445342e-05,
"loss": 2.9287,
"step": 16330
},
{
"epoch": 3.53,
"learning_rate": 1.1106929104449345e-05,
"loss": 2.9197,
"step": 16340
},
{
"epoch": 3.53,
"learning_rate": 1.1077877281168211e-05,
"loss": 2.9143,
"step": 16350
},
{
"epoch": 3.53,
"learning_rate": 1.1048852684416573e-05,
"loss": 2.9572,
"step": 16360
},
{
"epoch": 3.53,
"learning_rate": 1.1019855370955822e-05,
"loss": 2.9901,
"step": 16370
},
{
"epoch": 3.54,
"learning_rate": 1.0990885397493994e-05,
"loss": 2.9351,
"step": 16380
},
{
"epoch": 3.54,
"learning_rate": 1.0961942820685648e-05,
"loss": 2.9827,
"step": 16390
},
{
"epoch": 3.54,
"learning_rate": 1.0933027697131784e-05,
"loss": 2.9031,
"step": 16400
},
{
"epoch": 3.54,
"learning_rate": 1.0904140083379705e-05,
"loss": 2.9746,
"step": 16410
},
{
"epoch": 3.54,
"learning_rate": 1.0875280035922902e-05,
"loss": 2.9824,
"step": 16420
},
{
"epoch": 3.55,
"learning_rate": 1.0846447611200977e-05,
"loss": 2.9189,
"step": 16430
},
{
"epoch": 3.55,
"learning_rate": 1.0817642865599496e-05,
"loss": 2.9104,
"step": 16440
},
{
"epoch": 3.55,
"learning_rate": 1.0788865855449904e-05,
"loss": 2.9574,
"step": 16450
},
{
"epoch": 3.55,
"learning_rate": 1.0760116637029405e-05,
"loss": 2.8827,
"step": 16460
},
{
"epoch": 3.56,
"learning_rate": 1.0731395266560848e-05,
"loss": 2.9355,
"step": 16470
},
{
"epoch": 3.56,
"learning_rate": 1.0702701800212625e-05,
"loss": 2.9361,
"step": 16480
},
{
"epoch": 3.56,
"learning_rate": 1.0674036294098559e-05,
"loss": 2.935,
"step": 16490
},
{
"epoch": 3.56,
"learning_rate": 1.0651124058249481e-05,
"loss": 2.954,
"step": 16500
},
{
"epoch": 3.56,
"learning_rate": 1.0622509021789118e-05,
"loss": 2.934,
"step": 16510
},
{
"epoch": 3.57,
"learning_rate": 1.0593922102390363e-05,
"loss": 2.8717,
"step": 16520
},
{
"epoch": 3.57,
"learning_rate": 1.0565363355958673e-05,
"loss": 2.9195,
"step": 16530
},
{
"epoch": 3.57,
"learning_rate": 1.0536832838344405e-05,
"loss": 2.9556,
"step": 16540
},
{
"epoch": 3.57,
"learning_rate": 1.0508330605342715e-05,
"loss": 2.9069,
"step": 16550
},
{
"epoch": 3.58,
"learning_rate": 1.047985671269344e-05,
"loss": 2.9519,
"step": 16560
},
{
"epoch": 3.58,
"learning_rate": 1.0451411216081e-05,
"loss": 2.9561,
"step": 16570
},
{
"epoch": 3.58,
"learning_rate": 1.0422994171134268e-05,
"loss": 2.9446,
"step": 16580
},
{
"epoch": 3.58,
"learning_rate": 1.0394605633426502e-05,
"loss": 2.931,
"step": 16590
},
{
"epoch": 3.58,
"learning_rate": 1.0366245658475192e-05,
"loss": 3.0008,
"step": 16600
},
{
"epoch": 3.59,
"learning_rate": 1.0337914301741974e-05,
"loss": 2.9501,
"step": 16610
},
{
"epoch": 3.59,
"learning_rate": 1.030961161863251e-05,
"loss": 2.9523,
"step": 16620
},
{
"epoch": 3.59,
"learning_rate": 1.0281337664496402e-05,
"loss": 2.9458,
"step": 16630
},
{
"epoch": 3.59,
"learning_rate": 1.0253092494627053e-05,
"loss": 2.9615,
"step": 16640
},
{
"epoch": 3.59,
"learning_rate": 1.0227696497947367e-05,
"loss": 2.8976,
"step": 16650
},
{
"epoch": 3.6,
"learning_rate": 1.0199506170316519e-05,
"loss": 2.9351,
"step": 16660
},
{
"epoch": 3.6,
"learning_rate": 1.0174159621254309e-05,
"loss": 2.9665,
"step": 16670
},
{
"epoch": 3.6,
"learning_rate": 1.01460243348807e-05,
"loss": 2.9773,
"step": 16680
},
{
"epoch": 3.6,
"learning_rate": 1.0117918097396742e-05,
"loss": 2.9382,
"step": 16690
},
{
"epoch": 3.61,
"learning_rate": 1.0089840963767857e-05,
"loss": 2.9561,
"step": 16700
},
{
"epoch": 3.61,
"learning_rate": 1.0061792988902549e-05,
"loss": 2.9345,
"step": 16710
},
{
"epoch": 3.61,
"learning_rate": 1.003377422765229e-05,
"loss": 2.917,
"step": 16720
},
{
"epoch": 3.61,
"learning_rate": 1.0005784734811432e-05,
"loss": 2.9242,
"step": 16730
},
{
"epoch": 3.61,
"learning_rate": 9.977824565117086e-06,
"loss": 2.8672,
"step": 16740
},
{
"epoch": 3.62,
"learning_rate": 9.94989377324901e-06,
"loss": 2.925,
"step": 16750
},
{
"epoch": 3.62,
"learning_rate": 9.921992413829523e-06,
"loss": 2.9634,
"step": 16760
},
{
"epoch": 3.62,
"learning_rate": 9.89412054142337e-06,
"loss": 2.9177,
"step": 16770
},
{
"epoch": 3.62,
"learning_rate": 9.869061112705533e-06,
"loss": 2.9109,
"step": 16780
},
{
"epoch": 3.62,
"learning_rate": 9.841245415744002e-06,
"loss": 2.9186,
"step": 16790
},
{
"epoch": 3.63,
"learning_rate": 9.81623663334007e-06,
"loss": 2.9163,
"step": 16800
},
{
"epoch": 3.63,
"learning_rate": 9.791251902941274e-06,
"loss": 2.934,
"step": 16810
},
{
"epoch": 3.63,
"learning_rate": 9.763519348664627e-06,
"loss": 2.9066,
"step": 16820
},
{
"epoch": 3.63,
"learning_rate": 9.735816591316102e-06,
"loss": 2.9567,
"step": 16830
},
{
"epoch": 3.64,
"learning_rate": 9.708143685072052e-06,
"loss": 2.8996,
"step": 16840
},
{
"epoch": 3.64,
"learning_rate": 9.68050068405046e-06,
"loss": 2.9259,
"step": 16850
},
{
"epoch": 3.64,
"learning_rate": 9.652887642310832e-06,
"loss": 2.9475,
"step": 16860
},
{
"epoch": 3.64,
"learning_rate": 9.625304613854072e-06,
"loss": 2.8981,
"step": 16870
},
{
"epoch": 3.64,
"learning_rate": 9.597751652622397e-06,
"loss": 2.9046,
"step": 16880
},
{
"epoch": 3.65,
"learning_rate": 9.570228812499221e-06,
"loss": 2.9106,
"step": 16890
},
{
"epoch": 3.65,
"learning_rate": 9.542736147309053e-06,
"loss": 2.9376,
"step": 16900
},
{
"epoch": 3.65,
"learning_rate": 9.515273710817382e-06,
"loss": 2.9496,
"step": 16910
},
{
"epoch": 3.65,
"learning_rate": 9.487841556730605e-06,
"loss": 2.9224,
"step": 16920
},
{
"epoch": 3.66,
"learning_rate": 9.460439738695882e-06,
"loss": 2.9252,
"step": 16930
},
{
"epoch": 3.66,
"learning_rate": 9.433068310301039e-06,
"loss": 2.8755,
"step": 16940
},
{
"epoch": 3.66,
"learning_rate": 9.405727325074482e-06,
"loss": 2.9732,
"step": 16950
},
{
"epoch": 3.66,
"learning_rate": 9.37841683648508e-06,
"loss": 2.9543,
"step": 16960
},
{
"epoch": 3.66,
"learning_rate": 9.35113689794206e-06,
"loss": 2.9299,
"step": 16970
},
{
"epoch": 3.67,
"learning_rate": 9.323887562794903e-06,
"loss": 2.9776,
"step": 16980
},
{
"epoch": 3.67,
"learning_rate": 9.296668884333245e-06,
"loss": 2.8907,
"step": 16990
},
{
"epoch": 3.67,
"learning_rate": 9.269480915786771e-06,
"loss": 2.8901,
"step": 17000
},
{
"epoch": 3.67,
"learning_rate": 9.242323710325098e-06,
"loss": 2.938,
"step": 17010
},
{
"epoch": 3.67,
"learning_rate": 9.215197321057694e-06,
"loss": 2.9429,
"step": 17020
},
{
"epoch": 3.68,
"learning_rate": 9.188101801033753e-06,
"loss": 2.97,
"step": 17030
},
{
"epoch": 3.68,
"learning_rate": 9.161037203242107e-06,
"loss": 2.9537,
"step": 17040
},
{
"epoch": 3.68,
"learning_rate": 9.134003580611108e-06,
"loss": 2.9431,
"step": 17050
},
{
"epoch": 3.68,
"learning_rate": 9.10969984770203e-06,
"loss": 2.8524,
"step": 17060
},
{
"epoch": 3.69,
"learning_rate": 9.082725223476848e-06,
"loss": 2.9433,
"step": 17070
},
{
"epoch": 3.69,
"learning_rate": 9.055781727561624e-06,
"loss": 2.9438,
"step": 17080
},
{
"epoch": 3.69,
"learning_rate": 9.031559239493739e-06,
"loss": 2.9048,
"step": 17090
},
{
"epoch": 3.69,
"learning_rate": 9.004675032482182e-06,
"loss": 2.9177,
"step": 17100
},
{
"epoch": 3.69,
"learning_rate": 8.977822106417927e-06,
"loss": 2.9222,
"step": 17110
},
{
"epoch": 3.7,
"learning_rate": 8.951000513815364e-06,
"loss": 2.9498,
"step": 17120
},
{
"epoch": 3.7,
"learning_rate": 8.924210307127639e-06,
"loss": 2.9337,
"step": 17130
},
{
"epoch": 3.7,
"learning_rate": 8.897451538746495e-06,
"loss": 2.9521,
"step": 17140
},
{
"epoch": 3.7,
"learning_rate": 8.870724261002197e-06,
"loss": 2.9441,
"step": 17150
},
{
"epoch": 3.7,
"learning_rate": 8.844028526163425e-06,
"loss": 2.9411,
"step": 17160
},
{
"epoch": 3.71,
"learning_rate": 8.817364386437179e-06,
"loss": 2.9277,
"step": 17170
},
{
"epoch": 3.71,
"learning_rate": 8.790731893968668e-06,
"loss": 2.9238,
"step": 17180
},
{
"epoch": 3.71,
"learning_rate": 8.764131100841208e-06,
"loss": 2.9242,
"step": 17190
},
{
"epoch": 3.71,
"learning_rate": 8.740217532960001e-06,
"loss": 2.9047,
"step": 17200
},
{
"epoch": 3.72,
"learning_rate": 8.713677111847971e-06,
"loss": 2.9341,
"step": 17210
},
{
"epoch": 3.72,
"learning_rate": 8.687168540767681e-06,
"loss": 2.9459,
"step": 17220
},
{
"epoch": 3.72,
"learning_rate": 8.660691871560108e-06,
"loss": 2.8953,
"step": 17230
},
{
"epoch": 3.72,
"learning_rate": 8.634247156003836e-06,
"loss": 2.9709,
"step": 17240
},
{
"epoch": 3.72,
"learning_rate": 8.610474275119702e-06,
"loss": 2.9353,
"step": 17250
},
{
"epoch": 3.73,
"learning_rate": 8.584090413927014e-06,
"loss": 2.9058,
"step": 17260
},
{
"epoch": 3.73,
"learning_rate": 8.557738656189784e-06,
"loss": 2.9041,
"step": 17270
},
{
"epoch": 3.73,
"learning_rate": 8.531419053442315e-06,
"loss": 2.9232,
"step": 17280
},
{
"epoch": 3.73,
"learning_rate": 8.505131657156032e-06,
"loss": 2.8975,
"step": 17290
},
{
"epoch": 3.73,
"learning_rate": 8.478876518739364e-06,
"loss": 2.9012,
"step": 17300
},
{
"epoch": 3.74,
"learning_rate": 8.45265368953767e-06,
"loss": 2.9072,
"step": 17310
},
{
"epoch": 3.74,
"learning_rate": 8.426463220833109e-06,
"loss": 2.9171,
"step": 17320
},
{
"epoch": 3.74,
"learning_rate": 8.400305163844577e-06,
"loss": 2.9552,
"step": 17330
},
{
"epoch": 3.74,
"learning_rate": 8.374179569727563e-06,
"loss": 2.9531,
"step": 17340
},
{
"epoch": 3.75,
"learning_rate": 8.348086489574084e-06,
"loss": 2.917,
"step": 17350
},
{
"epoch": 3.75,
"learning_rate": 8.32202597441256e-06,
"loss": 2.9467,
"step": 17360
},
{
"epoch": 3.75,
"learning_rate": 8.295998075207736e-06,
"loss": 2.911,
"step": 17370
},
{
"epoch": 3.75,
"learning_rate": 8.270002842860569e-06,
"loss": 2.9084,
"step": 17380
},
{
"epoch": 3.75,
"learning_rate": 8.246635105929579e-06,
"loss": 2.9196,
"step": 17390
},
{
"epoch": 3.76,
"learning_rate": 8.220702080615212e-06,
"loss": 2.956,
"step": 17400
},
{
"epoch": 3.76,
"learning_rate": 8.194801869409655e-06,
"loss": 2.9453,
"step": 17410
},
{
"epoch": 3.76,
"learning_rate": 8.16893452296415e-06,
"loss": 2.9495,
"step": 17420
},
{
"epoch": 3.76,
"learning_rate": 8.143100091865672e-06,
"loss": 2.9126,
"step": 17430
},
{
"epoch": 3.77,
"learning_rate": 8.117298626636832e-06,
"loss": 2.8955,
"step": 17440
},
{
"epoch": 3.77,
"learning_rate": 8.091530177735762e-06,
"loss": 2.9386,
"step": 17450
},
{
"epoch": 3.77,
"learning_rate": 8.065794795556031e-06,
"loss": 2.9046,
"step": 17460
},
{
"epoch": 3.77,
"learning_rate": 8.040092530426537e-06,
"loss": 2.958,
"step": 17470
},
{
"epoch": 3.77,
"learning_rate": 8.0169888484326e-06,
"loss": 2.9561,
"step": 17480
},
{
"epoch": 3.78,
"learning_rate": 7.99134964412265e-06,
"loss": 2.9098,
"step": 17490
},
{
"epoch": 3.78,
"learning_rate": 7.965743702450158e-06,
"loss": 2.9147,
"step": 17500
},
{
"epoch": 3.78,
"learning_rate": 7.940171073490885e-06,
"loss": 2.9131,
"step": 17510
},
{
"epoch": 3.78,
"learning_rate": 7.914631807255448e-06,
"loss": 2.9006,
"step": 17520
},
{
"epoch": 3.78,
"learning_rate": 7.889125953689214e-06,
"loss": 2.9673,
"step": 17530
},
{
"epoch": 3.79,
"learning_rate": 7.863653562672222e-06,
"loss": 2.8807,
"step": 17540
},
{
"epoch": 3.79,
"learning_rate": 7.838214684019052e-06,
"loss": 2.9496,
"step": 17550
},
{
"epoch": 3.79,
"learning_rate": 7.812809367478758e-06,
"loss": 2.9407,
"step": 17560
},
{
"epoch": 3.79,
"learning_rate": 7.78743766273475e-06,
"loss": 2.9375,
"step": 17570
},
{
"epoch": 3.8,
"learning_rate": 7.762099619404713e-06,
"loss": 2.9029,
"step": 17580
},
{
"epoch": 3.8,
"learning_rate": 7.736795287040504e-06,
"loss": 2.9475,
"step": 17590
},
{
"epoch": 3.8,
"learning_rate": 7.711524715128047e-06,
"loss": 2.9779,
"step": 17600
},
{
"epoch": 3.8,
"learning_rate": 7.68881010644e-06,
"loss": 2.9201,
"step": 17610
},
{
"epoch": 3.8,
"learning_rate": 7.663603815483061e-06,
"loss": 2.8796,
"step": 17620
},
{
"epoch": 3.81,
"learning_rate": 7.63843142811335e-06,
"loss": 2.9633,
"step": 17630
},
{
"epoch": 3.81,
"learning_rate": 7.613292993558771e-06,
"loss": 2.8935,
"step": 17640
},
{
"epoch": 3.81,
"learning_rate": 7.588188560980814e-06,
"loss": 2.913,
"step": 17650
},
{
"epoch": 3.81,
"learning_rate": 7.563118179474471e-06,
"loss": 2.9524,
"step": 17660
},
{
"epoch": 3.81,
"learning_rate": 7.53808189806815e-06,
"loss": 2.9146,
"step": 17670
},
{
"epoch": 3.82,
"learning_rate": 7.515578440856219e-06,
"loss": 2.8998,
"step": 17680
},
{
"epoch": 3.82,
"learning_rate": 7.49060708447428e-06,
"loss": 2.9198,
"step": 17690
},
{
"epoch": 3.82,
"learning_rate": 7.465669969997282e-06,
"loss": 2.9823,
"step": 17700
},
{
"epoch": 3.82,
"learning_rate": 7.440767146193006e-06,
"loss": 2.9825,
"step": 17710
},
{
"epoch": 3.83,
"learning_rate": 7.41589866176218e-06,
"loss": 2.9456,
"step": 17720
},
{
"epoch": 3.83,
"learning_rate": 7.3910645653383706e-06,
"loss": 2.8956,
"step": 17730
},
{
"epoch": 3.83,
"learning_rate": 7.366264905487894e-06,
"loss": 2.9042,
"step": 17740
},
{
"epoch": 3.83,
"learning_rate": 7.341499730709736e-06,
"loss": 2.9224,
"step": 17750
},
{
"epoch": 3.83,
"learning_rate": 7.316769089435424e-06,
"loss": 2.8999,
"step": 17760
},
{
"epoch": 3.84,
"learning_rate": 7.292073030028959e-06,
"loss": 2.9098,
"step": 17770
},
{
"epoch": 3.84,
"learning_rate": 7.267411600786708e-06,
"loss": 2.9371,
"step": 17780
},
{
"epoch": 3.84,
"learning_rate": 7.24278484993732e-06,
"loss": 2.9765,
"step": 17790
},
{
"epoch": 3.84,
"learning_rate": 7.2181928256416235e-06,
"loss": 2.8928,
"step": 17800
},
{
"epoch": 3.84,
"learning_rate": 7.19363557599253e-06,
"loss": 2.9775,
"step": 17810
},
{
"epoch": 3.85,
"learning_rate": 7.1691131490149525e-06,
"loss": 2.8933,
"step": 17820
},
{
"epoch": 3.85,
"learning_rate": 7.144625592665697e-06,
"loss": 2.9352,
"step": 17830
},
{
"epoch": 3.85,
"learning_rate": 7.125060686622132e-06,
"loss": 2.916,
"step": 17840
},
{
"epoch": 3.85,
"learning_rate": 7.100636018037285e-06,
"loss": 2.9659,
"step": 17850
},
{
"epoch": 3.86,
"learning_rate": 7.0762463539967376e-06,
"loss": 2.9219,
"step": 17860
},
{
"epoch": 3.86,
"learning_rate": 7.051891742197661e-06,
"loss": 2.9151,
"step": 17870
},
{
"epoch": 3.86,
"learning_rate": 7.027572230268681e-06,
"loss": 2.9374,
"step": 17880
},
{
"epoch": 3.86,
"learning_rate": 7.003287865769781e-06,
"loss": 2.9011,
"step": 17890
},
{
"epoch": 3.86,
"learning_rate": 6.9790386961922055e-06,
"loss": 2.9009,
"step": 17900
},
{
"epoch": 3.87,
"learning_rate": 6.954824768958376e-06,
"loss": 2.9311,
"step": 17910
},
{
"epoch": 3.87,
"learning_rate": 6.930646131421786e-06,
"loss": 2.9133,
"step": 17920
},
{
"epoch": 3.87,
"learning_rate": 6.906502830866923e-06,
"loss": 2.9228,
"step": 17930
},
{
"epoch": 3.87,
"learning_rate": 6.882394914509158e-06,
"loss": 2.9297,
"step": 17940
},
{
"epoch": 3.88,
"learning_rate": 6.858322429494679e-06,
"loss": 2.8734,
"step": 17950
},
{
"epoch": 3.88,
"learning_rate": 6.839089983688265e-06,
"loss": 2.8926,
"step": 17960
},
{
"epoch": 3.88,
"learning_rate": 6.815081393678835e-06,
"loss": 2.9483,
"step": 17970
},
{
"epoch": 3.88,
"learning_rate": 6.7911083666530736e-06,
"loss": 2.9492,
"step": 17980
},
{
"epoch": 3.88,
"learning_rate": 6.767170949493359e-06,
"loss": 2.9161,
"step": 17990
},
{
"epoch": 3.89,
"learning_rate": 6.743269189012438e-06,
"loss": 2.9492,
"step": 18000
},
{
"epoch": 3.89,
"learning_rate": 6.7194031319533364e-06,
"loss": 2.923,
"step": 18010
},
{
"epoch": 3.89,
"learning_rate": 6.695572824989235e-06,
"loss": 2.9469,
"step": 18020
},
{
"epoch": 3.89,
"learning_rate": 6.6717783147234205e-06,
"loss": 2.9349,
"step": 18030
},
{
"epoch": 3.89,
"learning_rate": 6.648019647689157e-06,
"loss": 2.9467,
"step": 18040
},
{
"epoch": 3.9,
"learning_rate": 6.624296870349627e-06,
"loss": 2.9366,
"step": 18050
},
{
"epoch": 3.9,
"learning_rate": 6.60061002909782e-06,
"loss": 2.9608,
"step": 18060
},
{
"epoch": 3.9,
"learning_rate": 6.576959170256444e-06,
"loss": 2.9233,
"step": 18070
},
{
"epoch": 3.9,
"learning_rate": 6.5533443400778465e-06,
"loss": 2.9234,
"step": 18080
},
{
"epoch": 3.91,
"learning_rate": 6.52976558474391e-06,
"loss": 2.9073,
"step": 18090
},
{
"epoch": 3.91,
"learning_rate": 6.5062229503659725e-06,
"loss": 2.9385,
"step": 18100
},
{
"epoch": 3.91,
"learning_rate": 6.48271648298473e-06,
"loss": 2.9614,
"step": 18110
},
{
"epoch": 3.91,
"learning_rate": 6.459246228570151e-06,
"loss": 2.9386,
"step": 18120
},
{
"epoch": 3.91,
"learning_rate": 6.435812233021382e-06,
"loss": 3.0033,
"step": 18130
},
{
"epoch": 3.92,
"learning_rate": 6.412414542166662e-06,
"loss": 2.9445,
"step": 18140
},
{
"epoch": 3.92,
"learning_rate": 6.389053201763224e-06,
"loss": 2.9041,
"step": 18150
},
{
"epoch": 3.92,
"learning_rate": 6.368059112797076e-06,
"loss": 2.8788,
"step": 18160
},
{
"epoch": 3.92,
"learning_rate": 6.349422476642508e-06,
"loss": 2.9584,
"step": 18170
},
{
"epoch": 3.92,
"learning_rate": 6.326159510458385e-06,
"loss": 2.9204,
"step": 18180
},
{
"epoch": 3.93,
"learning_rate": 6.302933063408531e-06,
"loss": 2.8974,
"step": 18190
},
{
"epoch": 3.93,
"learning_rate": 6.279743180915301e-06,
"loss": 2.921,
"step": 18200
},
{
"epoch": 3.93,
"learning_rate": 6.256589908329538e-06,
"loss": 2.9497,
"step": 18210
},
{
"epoch": 3.93,
"learning_rate": 6.233473290930492e-06,
"loss": 2.8956,
"step": 18220
},
{
"epoch": 3.94,
"learning_rate": 6.210393373925724e-06,
"loss": 2.8968,
"step": 18230
},
{
"epoch": 3.94,
"learning_rate": 6.187350202451031e-06,
"loss": 2.9228,
"step": 18240
},
{
"epoch": 3.94,
"learning_rate": 6.164343821570343e-06,
"loss": 2.9037,
"step": 18250
},
{
"epoch": 3.94,
"learning_rate": 6.141374276275635e-06,
"loss": 2.9264,
"step": 18260
},
{
"epoch": 3.94,
"learning_rate": 6.1184416114868695e-06,
"loss": 2.9335,
"step": 18270
},
{
"epoch": 3.95,
"learning_rate": 6.095545872051859e-06,
"loss": 2.9021,
"step": 18280
},
{
"epoch": 3.95,
"learning_rate": 6.072687102746211e-06,
"loss": 2.9842,
"step": 18290
},
{
"epoch": 3.95,
"learning_rate": 6.049865348273237e-06,
"loss": 2.9644,
"step": 18300
},
{
"epoch": 3.95,
"learning_rate": 6.027080653263861e-06,
"loss": 2.9817,
"step": 18310
},
{
"epoch": 3.96,
"learning_rate": 6.004333062276527e-06,
"loss": 2.9293,
"step": 18320
},
{
"epoch": 3.96,
"learning_rate": 5.983891991095858e-06,
"loss": 2.9489,
"step": 18330
},
{
"epoch": 3.96,
"learning_rate": 5.961215020248978e-06,
"loss": 2.9413,
"step": 18340
},
{
"epoch": 3.96,
"learning_rate": 5.938575282232988e-06,
"loss": 2.9446,
"step": 18350
},
{
"epoch": 3.96,
"learning_rate": 5.9159728213228485e-06,
"loss": 2.9285,
"step": 18360
},
{
"epoch": 3.97,
"learning_rate": 5.8934076817206366e-06,
"loss": 2.9061,
"step": 18370
},
{
"epoch": 3.97,
"learning_rate": 5.870879907555424e-06,
"loss": 2.8982,
"step": 18380
},
{
"epoch": 3.97,
"learning_rate": 5.8483895428832175e-06,
"loss": 2.9327,
"step": 18390
},
{
"epoch": 3.97,
"learning_rate": 5.82593663168686e-06,
"loss": 2.9725,
"step": 18400
},
{
"epoch": 3.97,
"learning_rate": 5.803521217875951e-06,
"loss": 2.8764,
"step": 18410
},
{
"epoch": 3.98,
"learning_rate": 5.781143345286766e-06,
"loss": 2.9848,
"step": 18420
},
{
"epoch": 3.98,
"learning_rate": 5.763268106306163e-06,
"loss": 2.9213,
"step": 18430
},
{
"epoch": 3.98,
"learning_rate": 5.74095791814907e-06,
"loss": 2.9679,
"step": 18440
},
{
"epoch": 3.98,
"learning_rate": 5.718685393564405e-06,
"loss": 2.983,
"step": 18450
},
{
"epoch": 3.99,
"learning_rate": 5.696450576108994e-06,
"loss": 2.9548,
"step": 18460
},
{
"epoch": 3.99,
"learning_rate": 5.674253509265928e-06,
"loss": 2.9417,
"step": 18470
},
{
"epoch": 3.99,
"learning_rate": 5.652094236444475e-06,
"loss": 2.9665,
"step": 18480
},
{
"epoch": 3.99,
"learning_rate": 5.629972800979982e-06,
"loss": 2.9372,
"step": 18490
},
{
"epoch": 3.99,
"learning_rate": 5.607889246133807e-06,
"loss": 2.862,
"step": 18500
},
{
"epoch": 4.0,
"learning_rate": 5.585843615093228e-06,
"loss": 2.9052,
"step": 18510
},
{
"epoch": 4.0,
"learning_rate": 5.563835950971355e-06,
"loss": 2.8992,
"step": 18520
},
{
"epoch": 4.0,
"eval_loss": 2.925355911254883,
"eval_runtime": 267.2417,
"eval_samples_per_second": 554.629,
"eval_steps_per_second": 17.333,
"step": 18528
},
{
"epoch": 4.0,
"learning_rate": 5.54625718477993e-06,
"loss": 2.9229,
"step": 18530
},
{
"epoch": 4.0,
"learning_rate": 5.526510177827274e-06,
"loss": 2.941,
"step": 18540
},
{
"epoch": 4.0,
"learning_rate": 5.504605239154578e-06,
"loss": 2.9386,
"step": 18550
},
{
"epoch": 4.01,
"learning_rate": 5.482738426272846e-06,
"loss": 2.906,
"step": 18560
},
{
"epoch": 4.01,
"learning_rate": 5.4609097819454805e-06,
"loss": 2.9279,
"step": 18570
},
{
"epoch": 4.01,
"learning_rate": 5.439119348861252e-06,
"loss": 2.9461,
"step": 18580
},
{
"epoch": 4.01,
"learning_rate": 5.4173671696342e-06,
"loss": 2.926,
"step": 18590
},
{
"epoch": 4.02,
"learning_rate": 5.3956532868035454e-06,
"loss": 2.9203,
"step": 18600
},
{
"epoch": 4.02,
"learning_rate": 5.3739777428336275e-06,
"loss": 2.9273,
"step": 18610
},
{
"epoch": 4.02,
"learning_rate": 5.352340580113805e-06,
"loss": 2.9558,
"step": 18620
},
{
"epoch": 4.02,
"learning_rate": 5.33074184095837e-06,
"loss": 2.9251,
"step": 18630
},
{
"epoch": 4.02,
"learning_rate": 5.309181567606483e-06,
"loss": 2.9274,
"step": 18640
},
{
"epoch": 4.03,
"learning_rate": 5.287659802222075e-06,
"loss": 2.9568,
"step": 18650
},
{
"epoch": 4.03,
"learning_rate": 5.266176586893767e-06,
"loss": 2.909,
"step": 18660
},
{
"epoch": 4.03,
"learning_rate": 5.244731963634786e-06,
"loss": 2.9261,
"step": 18670
},
{
"epoch": 4.03,
"learning_rate": 5.225464833584112e-06,
"loss": 2.9317,
"step": 18680
},
{
"epoch": 4.03,
"learning_rate": 5.206229027600465e-06,
"loss": 2.8938,
"step": 18690
},
{
"epoch": 4.04,
"learning_rate": 5.1848926850032955e-06,
"loss": 2.9366,
"step": 18700
},
{
"epoch": 4.04,
"learning_rate": 5.163595093436755e-06,
"loss": 2.9214,
"step": 18710
},
{
"epoch": 4.04,
"learning_rate": 5.142336294551056e-06,
"loss": 2.8898,
"step": 18720
},
{
"epoch": 4.04,
"learning_rate": 5.121116329920564e-06,
"loss": 2.9075,
"step": 18730
},
{
"epoch": 4.05,
"learning_rate": 5.099935241043685e-06,
"loss": 2.9302,
"step": 18740
},
{
"epoch": 4.05,
"learning_rate": 5.0787930693428075e-06,
"loss": 2.9067,
"step": 18750
},
{
"epoch": 4.05,
"learning_rate": 5.057689856164208e-06,
"loss": 2.9363,
"step": 18760
},
{
"epoch": 4.05,
"learning_rate": 5.036625642777978e-06,
"loss": 2.9717,
"step": 18770
},
{
"epoch": 4.05,
"learning_rate": 5.01560047037794e-06,
"loss": 2.9426,
"step": 18780
},
{
"epoch": 4.06,
"learning_rate": 4.994614380081561e-06,
"loss": 2.883,
"step": 18790
},
{
"epoch": 4.06,
"learning_rate": 4.973667412929886e-06,
"loss": 2.9159,
"step": 18800
},
{
"epoch": 4.06,
"learning_rate": 4.952759609887442e-06,
"loss": 2.9153,
"step": 18810
},
{
"epoch": 4.06,
"learning_rate": 4.933976106258104e-06,
"loss": 2.9344,
"step": 18820
},
{
"epoch": 4.07,
"learning_rate": 4.913142827606107e-06,
"loss": 2.9481,
"step": 18830
},
{
"epoch": 4.07,
"learning_rate": 4.894426462174131e-06,
"loss": 2.916,
"step": 18840
},
{
"epoch": 4.07,
"learning_rate": 4.873667854991856e-06,
"loss": 2.878,
"step": 18850
},
{
"epoch": 4.07,
"learning_rate": 4.852948607481012e-06,
"loss": 2.9156,
"step": 18860
},
{
"epoch": 4.07,
"learning_rate": 4.832268760160791e-06,
"loss": 2.9417,
"step": 18870
},
{
"epoch": 4.08,
"learning_rate": 4.81162835347333e-06,
"loss": 2.9027,
"step": 18880
},
{
"epoch": 4.08,
"learning_rate": 4.791027427783656e-06,
"loss": 2.9327,
"step": 18890
},
{
"epoch": 4.08,
"learning_rate": 4.770466023379561e-06,
"loss": 2.9624,
"step": 18900
},
{
"epoch": 4.08,
"learning_rate": 4.7499441804715574e-06,
"loss": 2.9533,
"step": 18910
},
{
"epoch": 4.08,
"learning_rate": 4.729461939192789e-06,
"loss": 2.9412,
"step": 18920
},
{
"epoch": 4.09,
"learning_rate": 4.709019339598958e-06,
"loss": 2.8995,
"step": 18930
},
{
"epoch": 4.09,
"learning_rate": 4.688616421668232e-06,
"loss": 2.949,
"step": 18940
},
{
"epoch": 4.09,
"learning_rate": 4.668253225301186e-06,
"loss": 2.922,
"step": 18950
},
{
"epoch": 4.09,
"learning_rate": 4.647929790320713e-06,
"loss": 2.9252,
"step": 18960
},
{
"epoch": 4.1,
"learning_rate": 4.627646156471938e-06,
"loss": 2.9589,
"step": 18970
},
{
"epoch": 4.1,
"learning_rate": 4.607402363422158e-06,
"loss": 2.8673,
"step": 18980
},
{
"epoch": 4.1,
"learning_rate": 4.58719845076076e-06,
"loss": 2.8975,
"step": 18990
},
{
"epoch": 4.1,
"learning_rate": 4.569049059755334e-06,
"loss": 2.9253,
"step": 19000
},
{
"epoch": 4.1,
"learning_rate": 4.548921028621214e-06,
"loss": 2.9192,
"step": 19010
},
{
"epoch": 4.11,
"learning_rate": 4.528832992243359e-06,
"loss": 2.9645,
"step": 19020
},
{
"epoch": 4.11,
"learning_rate": 4.510787987490386e-06,
"loss": 2.9304,
"step": 19030
},
{
"epoch": 4.11,
"learning_rate": 4.490776049314299e-06,
"loss": 2.8655,
"step": 19040
},
{
"epoch": 4.11,
"learning_rate": 4.470804219604582e-06,
"loss": 2.9641,
"step": 19050
},
{
"epoch": 4.11,
"learning_rate": 4.450872537418754e-06,
"loss": 2.89,
"step": 19060
},
{
"epoch": 4.12,
"learning_rate": 4.430981041735827e-06,
"loss": 2.9563,
"step": 19070
},
{
"epoch": 4.12,
"learning_rate": 4.411129771456218e-06,
"loss": 2.9183,
"step": 19080
},
{
"epoch": 4.12,
"learning_rate": 4.391318765401672e-06,
"loss": 2.8547,
"step": 19090
},
{
"epoch": 4.12,
"learning_rate": 4.3715480623152085e-06,
"loss": 2.9288,
"step": 19100
},
{
"epoch": 4.13,
"learning_rate": 4.3518177008610215e-06,
"loss": 2.9048,
"step": 19110
},
{
"epoch": 4.13,
"learning_rate": 4.332127719624401e-06,
"loss": 2.9307,
"step": 19120
},
{
"epoch": 4.13,
"learning_rate": 4.312478157111682e-06,
"loss": 2.9311,
"step": 19130
},
{
"epoch": 4.13,
"learning_rate": 4.292869051750153e-06,
"loss": 2.8933,
"step": 19140
},
{
"epoch": 4.13,
"learning_rate": 4.273300441887976e-06,
"loss": 2.9427,
"step": 19150
},
{
"epoch": 4.14,
"learning_rate": 4.253772365794126e-06,
"loss": 2.9058,
"step": 19160
},
{
"epoch": 4.14,
"learning_rate": 4.234284861658305e-06,
"loss": 2.9487,
"step": 19170
},
{
"epoch": 4.14,
"learning_rate": 4.214837967590876e-06,
"loss": 2.8779,
"step": 19180
},
{
"epoch": 4.14,
"learning_rate": 4.1954317216227775e-06,
"loss": 2.9407,
"step": 19190
},
{
"epoch": 4.15,
"learning_rate": 4.176066161705461e-06,
"loss": 2.9329,
"step": 19200
},
{
"epoch": 4.15,
"learning_rate": 4.156741325710803e-06,
"loss": 2.9287,
"step": 19210
},
{
"epoch": 4.15,
"learning_rate": 4.141310803538462e-06,
"loss": 2.9632,
"step": 19220
},
{
"epoch": 4.15,
"learning_rate": 4.122059365787479e-06,
"loss": 3.0098,
"step": 19230
},
{
"epoch": 4.15,
"learning_rate": 4.102848757576502e-06,
"loss": 2.9116,
"step": 19240
},
{
"epoch": 4.16,
"learning_rate": 4.083679016474387e-06,
"loss": 2.9411,
"step": 19250
},
{
"epoch": 4.16,
"learning_rate": 4.064550179970064e-06,
"loss": 2.9678,
"step": 19260
},
{
"epoch": 4.16,
"learning_rate": 4.045462285472473e-06,
"loss": 2.9733,
"step": 19270
},
{
"epoch": 4.16,
"learning_rate": 4.026415370310482e-06,
"loss": 2.9219,
"step": 19280
},
{
"epoch": 4.16,
"learning_rate": 4.0074094717328174e-06,
"loss": 2.9283,
"step": 19290
},
{
"epoch": 4.17,
"learning_rate": 3.988444626908008e-06,
"loss": 2.8998,
"step": 19300
},
{
"epoch": 4.17,
"learning_rate": 3.969520872924282e-06,
"loss": 2.8915,
"step": 19310
},
{
"epoch": 4.17,
"learning_rate": 3.950638246789509e-06,
"loss": 2.9534,
"step": 19320
},
{
"epoch": 4.17,
"learning_rate": 3.93179678543113e-06,
"loss": 2.9408,
"step": 19330
},
{
"epoch": 4.18,
"learning_rate": 3.912996525696086e-06,
"loss": 2.9049,
"step": 19340
},
{
"epoch": 4.18,
"learning_rate": 3.894237504350742e-06,
"loss": 2.9489,
"step": 19350
},
{
"epoch": 4.18,
"learning_rate": 3.875519758080803e-06,
"loss": 2.9008,
"step": 19360
},
{
"epoch": 4.18,
"learning_rate": 3.856843323491286e-06,
"loss": 2.9249,
"step": 19370
},
{
"epoch": 4.18,
"learning_rate": 3.840069884036404e-06,
"loss": 2.9293,
"step": 19380
},
{
"epoch": 4.19,
"learning_rate": 3.823329963198441e-06,
"loss": 2.9439,
"step": 19390
},
{
"epoch": 4.19,
"learning_rate": 3.8047693953647045e-06,
"loss": 2.9547,
"step": 19400
},
{
"epoch": 4.19,
"learning_rate": 3.786250277572953e-06,
"loss": 2.9004,
"step": 19410
},
{
"epoch": 4.19,
"learning_rate": 3.767772646039747e-06,
"loss": 2.9355,
"step": 19420
},
{
"epoch": 4.19,
"learning_rate": 3.7511782782785167e-06,
"loss": 2.915,
"step": 19430
},
{
"epoch": 4.2,
"learning_rate": 3.732779570122516e-06,
"loss": 2.9124,
"step": 19440
},
{
"epoch": 4.2,
"learning_rate": 3.714422452794006e-06,
"loss": 2.8911,
"step": 19450
},
{
"epoch": 4.2,
"learning_rate": 3.6961069621927276e-06,
"loss": 2.8343,
"step": 19460
},
{
"epoch": 4.2,
"learning_rate": 3.6778331341370265e-06,
"loss": 2.955,
"step": 19470
},
{
"epoch": 4.21,
"learning_rate": 3.6596010043637497e-06,
"loss": 2.9173,
"step": 19480
},
{
"epoch": 4.21,
"learning_rate": 3.6414106085282148e-06,
"loss": 2.8867,
"step": 19490
},
{
"epoch": 4.21,
"learning_rate": 3.6232619822041096e-06,
"loss": 2.9144,
"step": 19500
},
{
"epoch": 4.21,
"learning_rate": 3.60515516088345e-06,
"loss": 2.9248,
"step": 19510
},
{
"epoch": 4.21,
"learning_rate": 3.587090179976488e-06,
"loss": 2.9245,
"step": 19520
},
{
"epoch": 4.22,
"learning_rate": 3.569067074811655e-06,
"loss": 2.9325,
"step": 19530
},
{
"epoch": 4.22,
"learning_rate": 3.5510858806354824e-06,
"loss": 2.9238,
"step": 19540
},
{
"epoch": 4.22,
"learning_rate": 3.5331466326125495e-06,
"loss": 2.9355,
"step": 19550
},
{
"epoch": 4.22,
"learning_rate": 3.515249365825396e-06,
"loss": 2.9357,
"step": 19560
},
{
"epoch": 4.22,
"learning_rate": 3.4973941152744633e-06,
"loss": 2.9645,
"step": 19570
},
{
"epoch": 4.23,
"learning_rate": 3.4795809158780306e-06,
"loss": 2.9188,
"step": 19580
},
{
"epoch": 4.23,
"learning_rate": 3.461809802472135e-06,
"loss": 2.9546,
"step": 19590
},
{
"epoch": 4.23,
"learning_rate": 3.444080809810507e-06,
"loss": 2.9524,
"step": 19600
},
{
"epoch": 4.23,
"learning_rate": 3.426393972564507e-06,
"loss": 2.9472,
"step": 19610
},
{
"epoch": 4.24,
"learning_rate": 3.4087493253230628e-06,
"loss": 2.9297,
"step": 19620
},
{
"epoch": 4.24,
"learning_rate": 3.391146902592579e-06,
"loss": 2.9287,
"step": 19630
},
{
"epoch": 4.24,
"learning_rate": 3.3735867387968993e-06,
"loss": 2.9313,
"step": 19640
},
{
"epoch": 4.24,
"learning_rate": 3.3560688682772124e-06,
"loss": 2.9533,
"step": 19650
},
{
"epoch": 4.24,
"learning_rate": 3.3403389738768166e-06,
"loss": 2.9395,
"step": 19660
},
{
"epoch": 4.25,
"learning_rate": 3.324643389697521e-06,
"loss": 2.9174,
"step": 19670
},
{
"epoch": 4.25,
"learning_rate": 3.307244122341488e-06,
"loss": 2.9545,
"step": 19680
},
{
"epoch": 4.25,
"learning_rate": 3.289887278003101e-06,
"loss": 2.9632,
"step": 19690
},
{
"epoch": 4.25,
"learning_rate": 3.274302417834721e-06,
"loss": 2.9087,
"step": 19700
},
{
"epoch": 4.26,
"learning_rate": 3.2570262706757863e-06,
"loss": 2.9049,
"step": 19710
},
{
"epoch": 4.26,
"learning_rate": 3.2397926447420706e-06,
"loss": 2.9407,
"step": 19720
},
{
"epoch": 4.26,
"learning_rate": 3.222601573736181e-06,
"loss": 2.9058,
"step": 19730
},
{
"epoch": 4.26,
"learning_rate": 3.2054530912774938e-06,
"loss": 2.9307,
"step": 19740
},
{
"epoch": 4.26,
"learning_rate": 3.1883472309021113e-06,
"loss": 2.9102,
"step": 19750
},
{
"epoch": 4.27,
"learning_rate": 3.171284026062779e-06,
"loss": 2.8462,
"step": 19760
},
{
"epoch": 4.27,
"learning_rate": 3.154263510128819e-06,
"loss": 2.8929,
"step": 19770
},
{
"epoch": 4.27,
"learning_rate": 3.1372857163860762e-06,
"loss": 2.9322,
"step": 19780
},
{
"epoch": 4.27,
"learning_rate": 3.120350678036843e-06,
"loss": 2.9085,
"step": 19790
},
{
"epoch": 4.27,
"learning_rate": 3.1034584281997985e-06,
"loss": 2.9083,
"step": 19800
},
{
"epoch": 4.28,
"learning_rate": 3.086608999909946e-06,
"loss": 2.9724,
"step": 19810
},
{
"epoch": 4.28,
"learning_rate": 3.0698024261185397e-06,
"loss": 2.9246,
"step": 19820
},
{
"epoch": 4.28,
"learning_rate": 3.0530387396930316e-06,
"loss": 2.9397,
"step": 19830
},
{
"epoch": 4.28,
"learning_rate": 3.0363179734170023e-06,
"loss": 2.8887,
"step": 19840
},
{
"epoch": 4.29,
"learning_rate": 3.019640159990092e-06,
"loss": 2.9137,
"step": 19850
},
{
"epoch": 4.29,
"learning_rate": 3.0030053320279417e-06,
"loss": 2.9107,
"step": 19860
},
{
"epoch": 4.29,
"learning_rate": 2.986413522062134e-06,
"loss": 2.8915,
"step": 19870
},
{
"epoch": 4.29,
"learning_rate": 2.96986476254012e-06,
"loss": 2.9252,
"step": 19880
},
{
"epoch": 4.29,
"learning_rate": 2.953359085825158e-06,
"loss": 2.9419,
"step": 19890
},
{
"epoch": 4.3,
"learning_rate": 2.9368965241962587e-06,
"loss": 2.917,
"step": 19900
},
{
"epoch": 4.3,
"learning_rate": 2.9204771098481075e-06,
"loss": 2.8856,
"step": 19910
},
{
"epoch": 4.3,
"learning_rate": 2.9041008748910108e-06,
"loss": 2.8807,
"step": 19920
},
{
"epoch": 4.3,
"learning_rate": 2.8893992082801107e-06,
"loss": 2.9318,
"step": 19930
},
{
"epoch": 4.3,
"learning_rate": 2.8731051023274504e-06,
"loss": 2.9485,
"step": 19940
},
{
"epoch": 4.31,
"learning_rate": 2.856854268408002e-06,
"loss": 2.9252,
"step": 19950
},
{
"epoch": 4.31,
"learning_rate": 2.8406467383023856e-06,
"loss": 2.9225,
"step": 19960
},
{
"epoch": 4.31,
"learning_rate": 2.8244825437065497e-06,
"loss": 2.901,
"step": 19970
},
{
"epoch": 4.31,
"learning_rate": 2.808361716231689e-06,
"loss": 2.9192,
"step": 19980
},
{
"epoch": 4.32,
"learning_rate": 2.7938900764510472e-06,
"loss": 2.9345,
"step": 19990
},
{
"epoch": 4.32,
"learning_rate": 2.777851733291073e-06,
"loss": 2.934,
"step": 20000
},
{
"epoch": 4.32,
"learning_rate": 2.7618568484447134e-06,
"loss": 2.9118,
"step": 20010
},
{
"epoch": 4.32,
"learning_rate": 2.7459054531920546e-06,
"loss": 2.9116,
"step": 20020
},
{
"epoch": 4.32,
"learning_rate": 2.7299975787281303e-06,
"loss": 2.8971,
"step": 20030
},
{
"epoch": 4.33,
"learning_rate": 2.714133256162871e-06,
"loss": 2.9548,
"step": 20040
},
{
"epoch": 4.33,
"learning_rate": 2.6983125165210288e-06,
"loss": 2.9026,
"step": 20050
},
{
"epoch": 4.33,
"learning_rate": 2.6825353907421275e-06,
"loss": 2.9684,
"step": 20060
},
{
"epoch": 4.33,
"learning_rate": 2.666801909680397e-06,
"loss": 2.9668,
"step": 20070
},
{
"epoch": 4.34,
"learning_rate": 2.6511121041047168e-06,
"loss": 2.9276,
"step": 20080
},
{
"epoch": 4.34,
"learning_rate": 2.637028646988826e-06,
"loss": 2.8787,
"step": 20090
},
{
"epoch": 4.34,
"learning_rate": 2.621421909298938e-06,
"loss": 2.9215,
"step": 20100
},
{
"epoch": 4.34,
"learning_rate": 2.605858935841618e-06,
"loss": 2.8808,
"step": 20110
},
{
"epoch": 4.34,
"learning_rate": 2.5903397570522875e-06,
"loss": 2.9303,
"step": 20120
},
{
"epoch": 4.35,
"learning_rate": 2.5748644032807383e-06,
"loss": 2.9384,
"step": 20130
},
{
"epoch": 4.35,
"learning_rate": 2.5594329047910398e-06,
"loss": 2.8845,
"step": 20140
},
{
"epoch": 4.35,
"learning_rate": 2.5440452917615044e-06,
"loss": 2.8979,
"step": 20150
},
{
"epoch": 4.35,
"learning_rate": 2.528701594284619e-06,
"loss": 2.9206,
"step": 20160
},
{
"epoch": 4.35,
"learning_rate": 2.513401842366986e-06,
"loss": 2.8658,
"step": 20170
},
{
"epoch": 4.36,
"learning_rate": 2.4981460659292718e-06,
"loss": 2.9256,
"step": 20180
},
{
"epoch": 4.36,
"learning_rate": 2.4859731271764277e-06,
"loss": 2.9432,
"step": 20190
},
{
"epoch": 4.36,
"learning_rate": 2.4723122541159603e-06,
"loss": 2.9694,
"step": 20200
},
{
"epoch": 4.36,
"learning_rate": 2.4571753602545515e-06,
"loss": 2.8911,
"step": 20210
},
{
"epoch": 4.37,
"learning_rate": 2.4420825518312847e-06,
"loss": 2.9635,
"step": 20220
},
{
"epoch": 4.37,
"learning_rate": 2.4270338583621295e-06,
"loss": 2.9206,
"step": 20230
},
{
"epoch": 4.37,
"learning_rate": 2.412029309276767e-06,
"loss": 2.9096,
"step": 20240
},
{
"epoch": 4.37,
"learning_rate": 2.3970689339185502e-06,
"loss": 2.9257,
"step": 20250
},
{
"epoch": 4.37,
"learning_rate": 2.382152761544451e-06,
"loss": 2.9143,
"step": 20260
},
{
"epoch": 4.38,
"learning_rate": 2.367280821324988e-06,
"loss": 2.9021,
"step": 20270
},
{
"epoch": 4.38,
"learning_rate": 2.352453142344188e-06,
"loss": 2.9122,
"step": 20280
},
{
"epoch": 4.38,
"learning_rate": 2.3376697535995112e-06,
"loss": 2.9325,
"step": 20290
},
{
"epoch": 4.38,
"learning_rate": 2.322930684001806e-06,
"loss": 2.9171,
"step": 20300
},
{
"epoch": 4.38,
"learning_rate": 2.308235962375249e-06,
"loss": 2.9461,
"step": 20310
},
{
"epoch": 4.39,
"learning_rate": 2.293585617457286e-06,
"loss": 2.912,
"step": 20320
},
{
"epoch": 4.39,
"learning_rate": 2.278979677898574e-06,
"loss": 2.9361,
"step": 20330
},
{
"epoch": 4.39,
"learning_rate": 2.2644181722629497e-06,
"loss": 2.9272,
"step": 20340
},
{
"epoch": 4.39,
"learning_rate": 2.249901129027332e-06,
"loss": 2.9205,
"step": 20350
},
{
"epoch": 4.4,
"learning_rate": 2.2354285765816966e-06,
"loss": 2.9243,
"step": 20360
},
{
"epoch": 4.4,
"learning_rate": 2.221000543229007e-06,
"loss": 2.8814,
"step": 20370
},
{
"epoch": 4.4,
"learning_rate": 2.208053400358304e-06,
"loss": 2.9423,
"step": 20380
},
{
"epoch": 4.4,
"learning_rate": 2.1937100309450288e-06,
"loss": 2.8944,
"step": 20390
},
{
"epoch": 4.4,
"learning_rate": 2.1794112622107675e-06,
"loss": 2.8914,
"step": 20400
},
{
"epoch": 4.41,
"learning_rate": 2.16515712211863e-06,
"loss": 2.9246,
"step": 20410
},
{
"epoch": 4.41,
"learning_rate": 2.1523665765659034e-06,
"loss": 2.9193,
"step": 20420
},
{
"epoch": 4.41,
"learning_rate": 2.1381973076195413e-06,
"loss": 2.9054,
"step": 20430
},
{
"epoch": 4.41,
"learning_rate": 2.1240727479145737e-06,
"loss": 2.9312,
"step": 20440
},
{
"epoch": 4.41,
"learning_rate": 2.10999292507342e-06,
"loss": 2.9516,
"step": 20450
},
{
"epoch": 4.42,
"learning_rate": 2.095957866631018e-06,
"loss": 2.9091,
"step": 20460
},
{
"epoch": 4.42,
"learning_rate": 2.0819676000347567e-06,
"loss": 2.9552,
"step": 20470
},
{
"epoch": 4.42,
"learning_rate": 2.068022152644425e-06,
"loss": 2.8603,
"step": 20480
},
{
"epoch": 4.42,
"learning_rate": 2.0541215517321718e-06,
"loss": 2.9328,
"step": 20490
},
{
"epoch": 4.43,
"learning_rate": 2.0402658244824347e-06,
"loss": 2.889,
"step": 20500
},
{
"epoch": 4.43,
"learning_rate": 2.0264549979919003e-06,
"loss": 2.9359,
"step": 20510
},
{
"epoch": 4.43,
"learning_rate": 2.014063666624186e-06,
"loss": 2.9313,
"step": 20520
},
{
"epoch": 4.43,
"learning_rate": 2.000338225912968e-06,
"loss": 2.9239,
"step": 20530
},
{
"epoch": 4.43,
"learning_rate": 1.9866577640445887e-06,
"loss": 2.9153,
"step": 20540
},
{
"epoch": 4.44,
"learning_rate": 1.9730223077729757e-06,
"loss": 2.937,
"step": 20550
},
{
"epoch": 4.44,
"learning_rate": 1.9594318837640457e-06,
"loss": 2.9189,
"step": 20560
},
{
"epoch": 4.44,
"learning_rate": 1.9458865185956466e-06,
"loss": 2.9348,
"step": 20570
},
{
"epoch": 4.44,
"learning_rate": 1.932386238757508e-06,
"loss": 2.9324,
"step": 20580
},
{
"epoch": 4.45,
"learning_rate": 1.9202745566832685e-06,
"loss": 2.9191,
"step": 20590
},
{
"epoch": 4.45,
"learning_rate": 1.9068600116360163e-06,
"loss": 2.9239,
"step": 20600
},
{
"epoch": 4.45,
"learning_rate": 1.8934906282404602e-06,
"loss": 2.9317,
"step": 20610
},
{
"epoch": 4.45,
"learning_rate": 1.880166432642172e-06,
"loss": 2.8828,
"step": 20620
},
{
"epoch": 4.45,
"learning_rate": 1.8668874508983581e-06,
"loss": 2.8845,
"step": 20630
},
{
"epoch": 4.46,
"learning_rate": 1.853653708977804e-06,
"loss": 2.9405,
"step": 20640
},
{
"epoch": 4.46,
"learning_rate": 1.8417820426900989e-06,
"loss": 2.9519,
"step": 20650
},
{
"epoch": 4.46,
"learning_rate": 1.8286343276608276e-06,
"loss": 2.9052,
"step": 20660
},
{
"epoch": 4.46,
"learning_rate": 1.815531927263797e-06,
"loss": 2.9356,
"step": 20670
},
{
"epoch": 4.46,
"learning_rate": 1.8024748671224584e-06,
"loss": 2.9447,
"step": 20680
},
{
"epoch": 4.47,
"learning_rate": 1.7894631727715994e-06,
"loss": 2.9093,
"step": 20690
},
{
"epoch": 4.47,
"learning_rate": 1.7764968696572876e-06,
"loss": 2.9615,
"step": 20700
},
{
"epoch": 4.47,
"learning_rate": 1.763575983136817e-06,
"loss": 2.9112,
"step": 20710
},
{
"epoch": 4.47,
"learning_rate": 1.7507005384786806e-06,
"loss": 2.9153,
"step": 20720
},
{
"epoch": 4.48,
"learning_rate": 1.7378705608624867e-06,
"loss": 2.9234,
"step": 20730
},
{
"epoch": 4.48,
"learning_rate": 1.7250860753789316e-06,
"loss": 2.9095,
"step": 20740
},
{
"epoch": 4.48,
"learning_rate": 1.7123471070297465e-06,
"loss": 2.9246,
"step": 20750
},
{
"epoch": 4.48,
"learning_rate": 1.6996536807276476e-06,
"loss": 2.8968,
"step": 20760
},
{
"epoch": 4.48,
"learning_rate": 1.687005821296289e-06,
"loss": 2.8706,
"step": 20770
},
{
"epoch": 4.49,
"learning_rate": 1.6744035534702135e-06,
"loss": 2.9237,
"step": 20780
},
{
"epoch": 4.49,
"learning_rate": 1.6618469018947979e-06,
"loss": 2.8883,
"step": 20790
},
{
"epoch": 4.49,
"learning_rate": 1.649335891126219e-06,
"loss": 2.9856,
"step": 20800
},
{
"epoch": 4.49,
"learning_rate": 1.6368705456313916e-06,
"loss": 2.9466,
"step": 20810
},
{
"epoch": 4.49,
"learning_rate": 1.6244508897879235e-06,
"loss": 2.936,
"step": 20820
},
{
"epoch": 4.5,
"learning_rate": 1.6120769478840753e-06,
"loss": 2.9661,
"step": 20830
},
{
"epoch": 4.5,
"learning_rate": 1.599748744118712e-06,
"loss": 2.9498,
"step": 20840
},
{
"epoch": 4.5,
"learning_rate": 1.5874663026012344e-06,
"loss": 2.9403,
"step": 20850
},
{
"epoch": 4.5,
"learning_rate": 1.5752296473515727e-06,
"loss": 2.9273,
"step": 20860
},
{
"epoch": 4.51,
"learning_rate": 1.563038802300104e-06,
"loss": 2.9342,
"step": 20870
},
{
"epoch": 4.51,
"learning_rate": 1.552106229179498e-06,
"loss": 2.8975,
"step": 20880
},
{
"epoch": 4.51,
"learning_rate": 1.540002489111883e-06,
"loss": 2.9145,
"step": 20890
},
{
"epoch": 4.51,
"learning_rate": 1.5291483489997955e-06,
"loss": 2.8894,
"step": 20900
},
{
"epoch": 4.51,
"learning_rate": 1.5171317993663059e-06,
"loss": 2.8679,
"step": 20910
},
{
"epoch": 4.52,
"learning_rate": 1.505161173548933e-06,
"loss": 2.9202,
"step": 20920
},
{
"epoch": 4.52,
"learning_rate": 1.493236494957806e-06,
"loss": 2.9046,
"step": 20930
},
{
"epoch": 4.52,
"learning_rate": 1.4813577869131874e-06,
"loss": 2.9238,
"step": 20940
},
{
"epoch": 4.52,
"learning_rate": 1.4695250726454418e-06,
"loss": 2.9176,
"step": 20950
},
{
"epoch": 4.53,
"learning_rate": 1.4577383752949896e-06,
"loss": 2.904,
"step": 20960
},
{
"epoch": 4.53,
"learning_rate": 1.4459977179122592e-06,
"loss": 2.893,
"step": 20970
},
{
"epoch": 4.53,
"learning_rate": 1.4343031234576337e-06,
"loss": 2.8934,
"step": 20980
},
{
"epoch": 4.53,
"learning_rate": 1.4226546148014247e-06,
"loss": 2.9175,
"step": 20990
},
{
"epoch": 4.53,
"learning_rate": 1.4110522147238125e-06,
"loss": 2.8787,
"step": 21000
},
{
"epoch": 4.54,
"learning_rate": 1.3994959459148027e-06,
"loss": 2.9106,
"step": 21010
},
{
"epoch": 4.54,
"learning_rate": 1.3879858309741922e-06,
"loss": 2.9021,
"step": 21020
},
{
"epoch": 4.54,
"learning_rate": 1.3765218924115113e-06,
"loss": 2.9293,
"step": 21030
},
{
"epoch": 4.54,
"learning_rate": 1.3651041526459879e-06,
"loss": 2.9421,
"step": 21040
},
{
"epoch": 4.54,
"learning_rate": 1.3537326340065104e-06,
"loss": 2.9577,
"step": 21050
},
{
"epoch": 4.55,
"learning_rate": 1.3424073587315623e-06,
"loss": 2.9374,
"step": 21060
},
{
"epoch": 4.55,
"learning_rate": 1.3322541673680216e-06,
"loss": 2.8901,
"step": 21070
},
{
"epoch": 4.55,
"learning_rate": 1.3210168154287967e-06,
"loss": 2.9682,
"step": 21080
},
{
"epoch": 4.55,
"learning_rate": 1.3109427908386651e-06,
"loss": 2.8987,
"step": 21090
},
{
"epoch": 4.56,
"learning_rate": 1.2997934415688862e-06,
"loss": 2.8876,
"step": 21100
},
{
"epoch": 4.56,
"learning_rate": 1.288690441148771e-06,
"loss": 2.9034,
"step": 21110
},
{
"epoch": 4.56,
"learning_rate": 1.2776338112916953e-06,
"loss": 2.9151,
"step": 21120
},
{
"epoch": 4.56,
"learning_rate": 1.2666235736203374e-06,
"loss": 2.9462,
"step": 21130
},
{
"epoch": 4.56,
"learning_rate": 1.2556597496666572e-06,
"loss": 2.9159,
"step": 21140
},
{
"epoch": 4.57,
"learning_rate": 1.2447423608718451e-06,
"loss": 2.9359,
"step": 21150
},
{
"epoch": 4.57,
"learning_rate": 1.2338714285862807e-06,
"loss": 2.9092,
"step": 21160
},
{
"epoch": 4.57,
"learning_rate": 1.2230469740694888e-06,
"loss": 2.9377,
"step": 21170
},
{
"epoch": 4.57,
"learning_rate": 1.2122690184901086e-06,
"loss": 2.8844,
"step": 21180
},
{
"epoch": 4.57,
"learning_rate": 1.201537582925838e-06,
"loss": 2.8579,
"step": 21190
},
{
"epoch": 4.58,
"learning_rate": 1.190852688363403e-06,
"loss": 2.8986,
"step": 21200
},
{
"epoch": 4.58,
"learning_rate": 1.1802143556985057e-06,
"loss": 2.911,
"step": 21210
},
{
"epoch": 4.58,
"learning_rate": 1.1696226057358012e-06,
"loss": 2.922,
"step": 21220
},
{
"epoch": 4.58,
"learning_rate": 1.1590774591888343e-06,
"loss": 2.9283,
"step": 21230
},
{
"epoch": 4.59,
"learning_rate": 1.1485789366800198e-06,
"loss": 2.9494,
"step": 21240
},
{
"epoch": 4.59,
"learning_rate": 1.138127058740593e-06,
"loss": 2.9426,
"step": 21250
},
{
"epoch": 4.59,
"learning_rate": 1.1277218458105588e-06,
"loss": 2.9226,
"step": 21260
},
{
"epoch": 4.59,
"learning_rate": 1.1173633182386734e-06,
"loss": 2.9349,
"step": 21270
},
{
"epoch": 4.59,
"learning_rate": 1.1070514962823936e-06,
"loss": 2.8987,
"step": 21280
},
{
"epoch": 4.6,
"learning_rate": 1.0978108064922637e-06,
"loss": 2.933,
"step": 21290
},
{
"epoch": 4.6,
"learning_rate": 1.0886079792243154e-06,
"loss": 2.9345,
"step": 21300
},
{
"epoch": 4.6,
"learning_rate": 1.0794430290563818e-06,
"loss": 2.942,
"step": 21310
},
{
"epoch": 4.6,
"learning_rate": 1.0693041925069897e-06,
"loss": 2.9194,
"step": 21320
},
{
"epoch": 4.6,
"learning_rate": 1.0592121555591e-06,
"loss": 2.9193,
"step": 21330
},
{
"epoch": 4.61,
"learning_rate": 1.0491669379490088e-06,
"loss": 2.9431,
"step": 21340
},
{
"epoch": 4.61,
"learning_rate": 1.039168559321449e-06,
"loss": 2.9082,
"step": 21350
},
{
"epoch": 4.61,
"learning_rate": 1.0292170392295575e-06,
"loss": 2.9273,
"step": 21360
},
{
"epoch": 4.61,
"learning_rate": 1.020300751281672e-06,
"loss": 2.9,
"step": 21370
},
{
"epoch": 4.62,
"learning_rate": 1.0104383159482062e-06,
"loss": 2.9254,
"step": 21380
},
{
"epoch": 4.62,
"learning_rate": 1.0006227953361535e-06,
"loss": 2.9413,
"step": 21390
},
{
"epoch": 4.62,
"learning_rate": 9.908542086410428e-07,
"loss": 2.8917,
"step": 21400
},
{
"epoch": 4.62,
"learning_rate": 9.811325749666283e-07,
"loss": 2.9364,
"step": 21410
},
{
"epoch": 4.62,
"learning_rate": 9.714579133248274e-07,
"loss": 2.9277,
"step": 21420
},
{
"epoch": 4.63,
"learning_rate": 9.618302426357085e-07,
"loss": 2.9372,
"step": 21430
},
{
"epoch": 4.63,
"learning_rate": 9.52249581727438e-07,
"loss": 2.8779,
"step": 21440
},
{
"epoch": 4.63,
"learning_rate": 9.427159493362481e-07,
"loss": 2.9611,
"step": 21450
},
{
"epoch": 4.63,
"learning_rate": 9.332293641064055e-07,
"loss": 2.9694,
"step": 21460
},
{
"epoch": 4.64,
"learning_rate": 9.237898445901672e-07,
"loss": 2.8813,
"step": 21470
},
{
"epoch": 4.64,
"learning_rate": 9.143974092477386e-07,
"loss": 2.9195,
"step": 21480
},
{
"epoch": 4.64,
"learning_rate": 9.050520764472658e-07,
"loss": 2.9095,
"step": 21490
},
{
"epoch": 4.64,
"learning_rate": 8.957538644647601e-07,
"loss": 2.9037,
"step": 21500
},
{
"epoch": 4.64,
"learning_rate": 8.865027914840923e-07,
"loss": 2.9367,
"step": 21510
},
{
"epoch": 4.65,
"learning_rate": 8.772988755969436e-07,
"loss": 2.9214,
"step": 21520
},
{
"epoch": 4.65,
"learning_rate": 8.681421348027713e-07,
"loss": 2.9252,
"step": 21530
},
{
"epoch": 4.65,
"learning_rate": 8.590325870087817e-07,
"loss": 2.9321,
"step": 21540
},
{
"epoch": 4.65,
"learning_rate": 8.499702500298829e-07,
"loss": 2.9334,
"step": 21550
},
{
"epoch": 4.65,
"learning_rate": 8.409551415886591e-07,
"loss": 2.9184,
"step": 21560
},
{
"epoch": 4.66,
"learning_rate": 8.319872793153355e-07,
"loss": 2.8939,
"step": 21570
},
{
"epoch": 4.66,
"learning_rate": 8.230666807477333e-07,
"loss": 2.9228,
"step": 21580
},
{
"epoch": 4.66,
"learning_rate": 8.150785669258116e-07,
"loss": 2.9541,
"step": 21590
},
{
"epoch": 4.66,
"learning_rate": 8.071287634911495e-07,
"loss": 2.9292,
"step": 21600
},
{
"epoch": 4.67,
"learning_rate": 7.983405958139095e-07,
"loss": 2.9029,
"step": 21610
},
{
"epoch": 4.67,
"learning_rate": 7.895997576428782e-07,
"loss": 2.9075,
"step": 21620
},
{
"epoch": 4.67,
"learning_rate": 7.809062660719041e-07,
"loss": 2.8983,
"step": 21630
},
{
"epoch": 4.67,
"learning_rate": 7.722601381022488e-07,
"loss": 2.8929,
"step": 21640
},
{
"epoch": 4.67,
"learning_rate": 7.636613906425449e-07,
"loss": 2.9307,
"step": 21650
},
{
"epoch": 4.68,
"learning_rate": 7.551100405087635e-07,
"loss": 2.9521,
"step": 21660
},
{
"epoch": 4.68,
"learning_rate": 7.466061044241885e-07,
"loss": 2.9046,
"step": 21670
},
{
"epoch": 4.68,
"learning_rate": 7.381495990193748e-07,
"loss": 2.9275,
"step": 21680
},
{
"epoch": 4.68,
"learning_rate": 7.297405408321301e-07,
"loss": 2.8683,
"step": 21690
},
{
"epoch": 4.68,
"learning_rate": 7.213789463074549e-07,
"loss": 2.923,
"step": 21700
},
{
"epoch": 4.69,
"learning_rate": 7.130648317975497e-07,
"loss": 2.9581,
"step": 21710
},
{
"epoch": 4.69,
"learning_rate": 7.064477367296541e-07,
"loss": 2.9294,
"step": 21720
},
{
"epoch": 4.69,
"learning_rate": 6.982191271574429e-07,
"loss": 2.9194,
"step": 21730
},
{
"epoch": 4.69,
"learning_rate": 6.900380428920466e-07,
"loss": 2.9258,
"step": 21740
},
{
"epoch": 4.7,
"learning_rate": 6.81904499932648e-07,
"loss": 2.8892,
"step": 21750
},
{
"epoch": 4.7,
"learning_rate": 6.738185141854508e-07,
"loss": 2.9712,
"step": 21760
},
{
"epoch": 4.7,
"learning_rate": 6.657801014636589e-07,
"loss": 2.9217,
"step": 21770
},
{
"epoch": 4.7,
"learning_rate": 6.577892774874389e-07,
"loss": 2.906,
"step": 21780
},
{
"epoch": 4.7,
"learning_rate": 6.49846057883885e-07,
"loss": 2.9009,
"step": 21790
},
{
"epoch": 4.71,
"learning_rate": 6.419504581870073e-07,
"loss": 2.9319,
"step": 21800
},
{
"epoch": 4.71,
"learning_rate": 6.341024938376794e-07,
"loss": 2.9143,
"step": 21810
},
{
"epoch": 4.71,
"learning_rate": 6.263021801836216e-07,
"loss": 2.9377,
"step": 21820
},
{
"epoch": 4.71,
"learning_rate": 6.185495324793649e-07,
"loss": 2.9046,
"step": 21830
},
{
"epoch": 4.72,
"learning_rate": 6.108445658862233e-07,
"loss": 2.9643,
"step": 21840
},
{
"epoch": 4.72,
"learning_rate": 6.031872954722684e-07,
"loss": 2.8773,
"step": 21850
},
{
"epoch": 4.72,
"learning_rate": 5.955777362122911e-07,
"loss": 2.9067,
"step": 21860
},
{
"epoch": 4.72,
"learning_rate": 5.880159029877819e-07,
"loss": 2.8597,
"step": 21870
},
{
"epoch": 4.72,
"learning_rate": 5.805018105868865e-07,
"loss": 2.9352,
"step": 21880
},
{
"epoch": 4.73,
"learning_rate": 5.730354737043975e-07,
"loss": 2.9014,
"step": 21890
},
{
"epoch": 4.73,
"learning_rate": 5.656169069417155e-07,
"loss": 2.8968,
"step": 21900
},
{
"epoch": 4.73,
"learning_rate": 5.582461248068133e-07,
"loss": 2.9007,
"step": 21910
},
{
"epoch": 4.73,
"learning_rate": 5.509231417142186e-07,
"loss": 2.9266,
"step": 21920
},
{
"epoch": 4.73,
"learning_rate": 5.450991801774913e-07,
"loss": 2.9315,
"step": 21930
},
{
"epoch": 4.74,
"learning_rate": 5.3786227138726e-07,
"loss": 2.8969,
"step": 21940
},
{
"epoch": 4.74,
"learning_rate": 5.306732015026266e-07,
"loss": 2.9191,
"step": 21950
},
{
"epoch": 4.74,
"learning_rate": 5.235319845827508e-07,
"loss": 2.9034,
"step": 21960
},
{
"epoch": 4.74,
"learning_rate": 5.164386345932193e-07,
"loss": 2.9417,
"step": 21970
},
{
"epoch": 4.75,
"learning_rate": 5.093931654060025e-07,
"loss": 2.888,
"step": 21980
},
{
"epoch": 4.75,
"learning_rate": 5.030931926132215e-07,
"loss": 2.8986,
"step": 21990
},
{
"epoch": 4.75,
"learning_rate": 4.961387348323049e-07,
"loss": 2.911,
"step": 22000
},
{
"epoch": 4.75,
"learning_rate": 4.892321975528108e-07,
"loss": 2.8962,
"step": 22010
},
{
"epoch": 4.75,
"learning_rate": 4.823735942813739e-07,
"loss": 2.9419,
"step": 22020
},
{
"epoch": 4.76,
"learning_rate": 4.755629384308902e-07,
"loss": 2.9051,
"step": 22030
},
{
"epoch": 4.76,
"learning_rate": 4.68800243320483e-07,
"loss": 2.9703,
"step": 22040
},
{
"epoch": 4.76,
"learning_rate": 4.6208552217548716e-07,
"loss": 2.969,
"step": 22050
},
{
"epoch": 4.76,
"learning_rate": 4.554187881274208e-07,
"loss": 2.9334,
"step": 22060
},
{
"epoch": 4.76,
"learning_rate": 4.488000542139492e-07,
"loss": 2.8883,
"step": 22070
},
{
"epoch": 4.77,
"learning_rate": 4.422293333788713e-07,
"loss": 2.9425,
"step": 22080
},
{
"epoch": 4.77,
"learning_rate": 4.357066384720915e-07,
"loss": 2.9122,
"step": 22090
},
{
"epoch": 4.77,
"learning_rate": 4.292319822495894e-07,
"loss": 2.9316,
"step": 22100
},
{
"epoch": 4.77,
"learning_rate": 4.2280537737340587e-07,
"loss": 2.917,
"step": 22110
},
{
"epoch": 4.78,
"learning_rate": 4.1642683641159866e-07,
"loss": 2.9661,
"step": 22120
},
{
"epoch": 4.78,
"learning_rate": 4.1009637183824224e-07,
"loss": 2.9271,
"step": 22130
},
{
"epoch": 4.78,
"learning_rate": 4.038139960333892e-07,
"loss": 2.9702,
"step": 22140
},
{
"epoch": 4.78,
"learning_rate": 3.975797212830451e-07,
"loss": 2.9143,
"step": 22150
},
{
"epoch": 4.78,
"learning_rate": 3.913935597791435e-07,
"loss": 2.9215,
"step": 22160
},
{
"epoch": 4.79,
"learning_rate": 3.8525552361953764e-07,
"loss": 2.9807,
"step": 22170
},
{
"epoch": 4.79,
"learning_rate": 3.7916562480795337e-07,
"loss": 2.8708,
"step": 22180
},
{
"epoch": 4.79,
"learning_rate": 3.7432837265567456e-07,
"loss": 2.8824,
"step": 22190
},
{
"epoch": 4.79,
"learning_rate": 3.68325151019433e-07,
"loss": 2.8987,
"step": 22200
},
{
"epoch": 4.79,
"learning_rate": 3.6237009984076974e-07,
"loss": 2.914,
"step": 22210
},
{
"epoch": 4.8,
"learning_rate": 3.564632307655663e-07,
"loss": 2.9721,
"step": 22220
},
{
"epoch": 4.8,
"learning_rate": 3.5060455534547677e-07,
"loss": 2.9599,
"step": 22230
},
{
"epoch": 4.8,
"learning_rate": 3.447940850379083e-07,
"loss": 2.9596,
"step": 22240
},
{
"epoch": 4.8,
"learning_rate": 3.396058865258789e-07,
"loss": 2.9137,
"step": 22250
},
{
"epoch": 4.81,
"learning_rate": 3.338870371595942e-07,
"loss": 2.9686,
"step": 22260
},
{
"epoch": 4.81,
"learning_rate": 3.287813157371117e-07,
"loss": 2.9201,
"step": 22270
},
{
"epoch": 4.81,
"learning_rate": 3.2315412768618157e-07,
"loss": 2.896,
"step": 22280
},
{
"epoch": 4.81,
"learning_rate": 3.175751984306669e-07,
"loss": 2.8786,
"step": 22290
},
{
"epoch": 4.81,
"learning_rate": 3.120445388808929e-07,
"loss": 2.9114,
"step": 22300
},
{
"epoch": 4.82,
"learning_rate": 3.0656215985278844e-07,
"loss": 2.9083,
"step": 22310
},
{
"epoch": 4.82,
"learning_rate": 3.0112807206785776e-07,
"loss": 2.9081,
"step": 22320
},
{
"epoch": 4.82,
"learning_rate": 2.9574228615317226e-07,
"loss": 2.9451,
"step": 22330
},
{
"epoch": 4.82,
"learning_rate": 2.9040481264134e-07,
"loss": 2.936,
"step": 22340
},
{
"epoch": 4.83,
"learning_rate": 2.8511566197048645e-07,
"loss": 2.8983,
"step": 22350
},
{
"epoch": 4.83,
"learning_rate": 2.798748444842375e-07,
"loss": 2.9291,
"step": 22360
},
{
"epoch": 4.83,
"learning_rate": 2.746823704316948e-07,
"loss": 2.9234,
"step": 22370
},
{
"epoch": 4.83,
"learning_rate": 2.695382499674215e-07,
"loss": 2.9015,
"step": 22380
},
{
"epoch": 4.83,
"learning_rate": 2.64442493151415e-07,
"loss": 2.9238,
"step": 22390
},
{
"epoch": 4.84,
"learning_rate": 2.5939510994909544e-07,
"loss": 2.9128,
"step": 22400
},
{
"epoch": 4.84,
"learning_rate": 2.5439611023128084e-07,
"loss": 2.908,
"step": 22410
},
{
"epoch": 4.84,
"learning_rate": 2.4944550377416497e-07,
"loss": 2.9197,
"step": 22420
},
{
"epoch": 4.84,
"learning_rate": 2.4454330025930616e-07,
"loss": 2.9077,
"step": 22430
},
{
"epoch": 4.84,
"learning_rate": 2.396895092736079e-07,
"loss": 2.9352,
"step": 22440
},
{
"epoch": 4.85,
"learning_rate": 2.348841403092855e-07,
"loss": 2.9243,
"step": 22450
},
{
"epoch": 4.85,
"learning_rate": 2.30127202763869e-07,
"loss": 2.8752,
"step": 22460
},
{
"epoch": 4.85,
"learning_rate": 2.2541870594017246e-07,
"loss": 2.9104,
"step": 22470
},
{
"epoch": 4.85,
"learning_rate": 2.2075865904627458e-07,
"loss": 2.9068,
"step": 22480
},
{
"epoch": 4.86,
"learning_rate": 2.161470711955077e-07,
"loss": 2.8987,
"step": 22490
},
{
"epoch": 4.86,
"learning_rate": 2.1158395140643272e-07,
"loss": 2.9211,
"step": 22500
},
{
"epoch": 4.86,
"learning_rate": 2.0706930860282524e-07,
"loss": 2.9136,
"step": 22510
},
{
"epoch": 4.86,
"learning_rate": 2.0260315161366173e-07,
"loss": 2.9604,
"step": 22520
},
{
"epoch": 4.86,
"learning_rate": 1.9818548917309444e-07,
"loss": 2.8911,
"step": 22530
},
{
"epoch": 4.87,
"learning_rate": 1.9381632992044042e-07,
"loss": 2.9163,
"step": 22540
},
{
"epoch": 4.87,
"learning_rate": 1.8949568240015647e-07,
"loss": 2.9338,
"step": 22550
},
{
"epoch": 4.87,
"learning_rate": 1.852235550618392e-07,
"loss": 2.9272,
"step": 22560
},
{
"epoch": 4.87,
"learning_rate": 1.8099995626018885e-07,
"loss": 2.8668,
"step": 22570
},
{
"epoch": 4.87,
"learning_rate": 1.7765602331904218e-07,
"loss": 2.9212,
"step": 22580
},
{
"epoch": 4.88,
"learning_rate": 1.7351979663429997e-07,
"loss": 2.9157,
"step": 22590
},
{
"epoch": 4.88,
"learning_rate": 1.6943212137445486e-07,
"loss": 2.9044,
"step": 22600
},
{
"epoch": 4.88,
"learning_rate": 1.6539300553348737e-07,
"loss": 2.9358,
"step": 22610
},
{
"epoch": 4.88,
"learning_rate": 1.61402457010415e-07,
"loss": 2.9139,
"step": 22620
},
{
"epoch": 4.89,
"learning_rate": 1.5746048360928135e-07,
"loss": 2.8807,
"step": 22630
},
{
"epoch": 4.89,
"learning_rate": 1.5356709303912809e-07,
"loss": 2.9356,
"step": 22640
},
{
"epoch": 4.89,
"learning_rate": 1.4972229291398687e-07,
"loss": 2.9193,
"step": 22650
},
{
"epoch": 4.89,
"learning_rate": 1.4592609075286812e-07,
"loss": 2.9266,
"step": 22660
},
{
"epoch": 4.89,
"learning_rate": 1.421784939797416e-07,
"loss": 2.9354,
"step": 22670
},
{
"epoch": 4.9,
"learning_rate": 1.3884722054997302e-07,
"loss": 2.8989,
"step": 22680
},
{
"epoch": 4.9,
"learning_rate": 1.351919941266183e-07,
"loss": 2.9092,
"step": 22690
},
{
"epoch": 4.9,
"learning_rate": 1.3158539408318348e-07,
"loss": 2.9448,
"step": 22700
},
{
"epoch": 4.9,
"learning_rate": 1.2802742747284603e-07,
"loss": 2.9296,
"step": 22710
},
{
"epoch": 4.91,
"learning_rate": 1.245181012536706e-07,
"loss": 2.9456,
"step": 22720
},
{
"epoch": 4.91,
"learning_rate": 1.2105742228861183e-07,
"loss": 2.9007,
"step": 22730
},
{
"epoch": 4.91,
"learning_rate": 1.1764539734547275e-07,
"loss": 2.911,
"step": 22740
},
{
"epoch": 4.91,
"learning_rate": 1.1428203309691299e-07,
"loss": 2.918,
"step": 22750
},
{
"epoch": 4.91,
"learning_rate": 1.1096733612043497e-07,
"loss": 2.9083,
"step": 22760
},
{
"epoch": 4.92,
"learning_rate": 1.0770131289836171e-07,
"loss": 2.8594,
"step": 22770
},
{
"epoch": 4.92,
"learning_rate": 1.0448396981782294e-07,
"loss": 2.8942,
"step": 22780
},
{
"epoch": 4.92,
"learning_rate": 1.0162998776860611e-07,
"loss": 2.942,
"step": 22790
},
{
"epoch": 4.92,
"learning_rate": 9.850515421257755e-08,
"loss": 2.9029,
"step": 22800
},
{
"epoch": 4.92,
"learning_rate": 9.542901878237809e-08,
"loss": 2.8965,
"step": 22810
},
{
"epoch": 4.93,
"learning_rate": 9.240158749378725e-08,
"loss": 2.9139,
"step": 22820
},
{
"epoch": 4.93,
"learning_rate": 8.942286626734686e-08,
"loss": 2.9332,
"step": 22830
},
{
"epoch": 4.93,
"learning_rate": 8.649286092833331e-08,
"loss": 2.9405,
"step": 22840
},
{
"epoch": 4.93,
"learning_rate": 8.36115772067575e-08,
"loss": 2.891,
"step": 22850
},
{
"epoch": 4.94,
"learning_rate": 8.077902073734544e-08,
"loss": 2.9606,
"step": 22860
},
{
"epoch": 4.94,
"learning_rate": 7.827138629578345e-08,
"loss": 2.8665,
"step": 22870
},
{
"epoch": 4.94,
"learning_rate": 7.553142678785319e-08,
"loss": 2.9215,
"step": 22880
},
{
"epoch": 4.94,
"learning_rate": 7.284021033387157e-08,
"loss": 2.933,
"step": 22890
},
{
"epoch": 4.94,
"learning_rate": 7.019774219687025e-08,
"loss": 2.9721,
"step": 22900
},
{
"epoch": 4.95,
"learning_rate": 6.760402754453499e-08,
"loss": 2.9288,
"step": 22910
},
{
"epoch": 4.95,
"learning_rate": 6.505907144921942e-08,
"loss": 2.9275,
"step": 22920
},
{
"epoch": 4.95,
"learning_rate": 6.256287888791457e-08,
"loss": 2.8721,
"step": 22930
},
{
"epoch": 4.95,
"learning_rate": 6.011545474225166e-08,
"loss": 2.9329,
"step": 22940
},
{
"epoch": 4.95,
"learning_rate": 5.771680379848543e-08,
"loss": 2.9219,
"step": 22950
},
{
"epoch": 4.96,
"learning_rate": 5.536693074749688e-08,
"loss": 2.9175,
"step": 22960
},
{
"epoch": 4.96,
"learning_rate": 5.3065840184762796e-08,
"loss": 2.9021,
"step": 22970
},
{
"epoch": 4.96,
"learning_rate": 5.081353661037236e-08,
"loss": 2.9177,
"step": 22980
},
{
"epoch": 4.96,
"learning_rate": 4.861002442899665e-08,
"loss": 2.9304,
"step": 22990
},
{
"epoch": 4.97,
"learning_rate": 4.645530794989139e-08,
"loss": 2.947,
"step": 23000
},
{
"epoch": 4.97,
"learning_rate": 4.4349391386885877e-08,
"loss": 2.8928,
"step": 23010
},
{
"epoch": 4.97,
"learning_rate": 4.2495793814276154e-08,
"loss": 2.9327,
"step": 23020
},
{
"epoch": 4.97,
"learning_rate": 4.048260835915474e-08,
"loss": 2.9141,
"step": 23030
},
{
"epoch": 4.97,
"learning_rate": 3.851823450053138e-08,
"loss": 2.9006,
"step": 23040
},
{
"epoch": 4.98,
"learning_rate": 3.660267607999146e-08,
"loss": 2.8819,
"step": 23050
},
{
"epoch": 4.98,
"learning_rate": 3.4920413799213736e-08,
"loss": 2.9409,
"step": 23060
},
{
"epoch": 4.98,
"learning_rate": 3.309761495270125e-08,
"loss": 2.9731,
"step": 23070
},
{
"epoch": 4.98,
"learning_rate": 3.132364214500083e-08,
"loss": 2.9644,
"step": 23080
},
{
"epoch": 4.98,
"learning_rate": 2.9598498845351264e-08,
"loss": 2.8922,
"step": 23090
},
{
"epoch": 4.99,
"learning_rate": 2.7922188427492725e-08,
"loss": 2.9001,
"step": 23100
},
{
"epoch": 4.99,
"learning_rate": 2.629471416967233e-08,
"loss": 2.9262,
"step": 23110
},
{
"epoch": 4.99,
"learning_rate": 2.4716079254624715e-08,
"loss": 2.9005,
"step": 23120
},
{
"epoch": 4.99,
"learning_rate": 2.3186286769580346e-08,
"loss": 2.9714,
"step": 23130
},
{
"epoch": 5.0,
"learning_rate": 2.1705339706248885e-08,
"loss": 2.9404,
"step": 23140
},
{
"epoch": 5.0,
"learning_rate": 2.0273240960810846e-08,
"loss": 2.9227,
"step": 23150
},
{
"epoch": 5.0,
"learning_rate": 1.8889993333925938e-08,
"loss": 2.8984,
"step": 23160
},
{
"epoch": 5.0,
"eval_loss": 2.9193811416625977,
"eval_runtime": 267.5732,
"eval_samples_per_second": 553.942,
"eval_steps_per_second": 17.311,
"step": 23160
},
{
"epoch": 5.0,
"step": 23160,
"total_flos": 9.509350727546307e+18,
"train_loss": 3.0286704001649056,
"train_runtime": 16901.5253,
"train_samples_per_second": 175.391,
"train_steps_per_second": 1.37
}
],
"max_steps": 23160,
"num_train_epochs": 5,
"total_flos": 9.509350727546307e+18,
"trial_name": null,
"trial_params": null
}