finetuned_models/my_belle_model/trainer_state.json

2420 lines
48 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.981238273921202,
"global_step": 3990,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 4.999922507133964e-05,
"loss": 2.4224,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 4.999690033339971e-05,
"loss": 2.3638,
"step": 20
},
{
"epoch": 0.08,
"learning_rate": 4.9993025930300686e-05,
"loss": 2.3235,
"step": 30
},
{
"epoch": 0.1,
"learning_rate": 4.998760210223347e-05,
"loss": 2.2665,
"step": 40
},
{
"epoch": 0.13,
"learning_rate": 4.998062918544442e-05,
"loss": 2.2922,
"step": 50
},
{
"epoch": 0.15,
"learning_rate": 4.99721076122146e-05,
"loss": 2.2615,
"step": 60
},
{
"epoch": 0.18,
"learning_rate": 4.996203791083291e-05,
"loss": 2.2748,
"step": 70
},
{
"epoch": 0.2,
"learning_rate": 4.9950420705563365e-05,
"loss": 2.265,
"step": 80
},
{
"epoch": 0.23,
"learning_rate": 4.9937256716606394e-05,
"loss": 2.291,
"step": 90
},
{
"epoch": 0.25,
"learning_rate": 4.992254676005419e-05,
"loss": 2.2525,
"step": 100
},
{
"epoch": 0.28,
"learning_rate": 4.990629174784009e-05,
"loss": 2.2568,
"step": 110
},
{
"epoch": 0.3,
"learning_rate": 4.9888492687682096e-05,
"loss": 2.1978,
"step": 120
},
{
"epoch": 0.33,
"learning_rate": 4.9869150683020335e-05,
"loss": 2.2498,
"step": 130
},
{
"epoch": 0.35,
"learning_rate": 4.984826693294874e-05,
"loss": 2.2394,
"step": 140
},
{
"epoch": 0.38,
"learning_rate": 4.982584273214061e-05,
"loss": 2.2257,
"step": 150
},
{
"epoch": 0.4,
"learning_rate": 4.98018794707684e-05,
"loss": 2.1823,
"step": 160
},
{
"epoch": 0.43,
"learning_rate": 4.977637863441759e-05,
"loss": 2.2213,
"step": 170
},
{
"epoch": 0.45,
"learning_rate": 4.9749341803994465e-05,
"loss": 2.232,
"step": 180
},
{
"epoch": 0.48,
"learning_rate": 4.972077065562821e-05,
"loss": 2.2008,
"step": 190
},
{
"epoch": 0.5,
"learning_rate": 4.969066696056699e-05,
"loss": 2.2421,
"step": 200
},
{
"epoch": 0.53,
"learning_rate": 4.965903258506806e-05,
"loss": 2.1941,
"step": 210
},
{
"epoch": 0.55,
"learning_rate": 4.9625869490282176e-05,
"loss": 2.1979,
"step": 220
},
{
"epoch": 0.58,
"learning_rate": 4.959117973213194e-05,
"loss": 2.182,
"step": 230
},
{
"epoch": 0.6,
"learning_rate": 4.955496546118439e-05,
"loss": 2.2056,
"step": 240
},
{
"epoch": 0.63,
"learning_rate": 4.951722892251762e-05,
"loss": 2.2046,
"step": 250
},
{
"epoch": 0.65,
"learning_rate": 4.947797245558168e-05,
"loss": 2.2378,
"step": 260
},
{
"epoch": 0.68,
"learning_rate": 4.9437198494053464e-05,
"loss": 2.2175,
"step": 270
},
{
"epoch": 0.7,
"learning_rate": 4.9394909565685894e-05,
"loss": 2.2233,
"step": 280
},
{
"epoch": 0.73,
"learning_rate": 4.935110829215117e-05,
"loss": 2.2411,
"step": 290
},
{
"epoch": 0.75,
"learning_rate": 4.9305797388878264e-05,
"loss": 2.2573,
"step": 300
},
{
"epoch": 0.78,
"learning_rate": 4.92589796648846e-05,
"loss": 2.2064,
"step": 310
},
{
"epoch": 0.8,
"learning_rate": 4.921065802260185e-05,
"loss": 2.2152,
"step": 320
},
{
"epoch": 0.83,
"learning_rate": 4.916083545769607e-05,
"loss": 2.2198,
"step": 330
},
{
"epoch": 0.85,
"learning_rate": 4.9109515058881925e-05,
"loss": 2.224,
"step": 340
},
{
"epoch": 0.88,
"learning_rate": 4.905670000773126e-05,
"loss": 2.2385,
"step": 350
},
{
"epoch": 0.9,
"learning_rate": 4.9002393578475816e-05,
"loss": 2.1891,
"step": 360
},
{
"epoch": 0.93,
"learning_rate": 4.894659913780427e-05,
"loss": 2.2387,
"step": 370
},
{
"epoch": 0.95,
"learning_rate": 4.888932014465352e-05,
"loss": 2.1901,
"step": 380
},
{
"epoch": 0.98,
"learning_rate": 4.883056014999423e-05,
"loss": 2.1921,
"step": 390
},
{
"epoch": 1.0,
"learning_rate": 4.877032279661073e-05,
"loss": 2.2066,
"step": 400
},
{
"epoch": 1.03,
"learning_rate": 4.870861181887514e-05,
"loss": 2.1981,
"step": 410
},
{
"epoch": 1.05,
"learning_rate": 4.864543104251587e-05,
"loss": 2.1755,
"step": 420
},
{
"epoch": 1.08,
"learning_rate": 4.8580784384380486e-05,
"loss": 2.1689,
"step": 430
},
{
"epoch": 1.1,
"learning_rate": 4.8528014313136675e-05,
"loss": 2.1933,
"step": 440
},
{
"epoch": 1.13,
"learning_rate": 4.8460739228178806e-05,
"loss": 2.1953,
"step": 450
},
{
"epoch": 1.15,
"learning_rate": 4.839200971128324e-05,
"loss": 2.2033,
"step": 460
},
{
"epoch": 1.18,
"learning_rate": 4.832183002328776e-05,
"loss": 2.1631,
"step": 470
},
{
"epoch": 1.2,
"learning_rate": 4.8250204514932517e-05,
"loss": 2.2011,
"step": 480
},
{
"epoch": 1.23,
"learning_rate": 4.817713762659024e-05,
"loss": 2.1802,
"step": 490
},
{
"epoch": 1.25,
"learning_rate": 4.810263388799101e-05,
"loss": 2.2153,
"step": 500
},
{
"epoch": 1.28,
"learning_rate": 4.80266979179414e-05,
"loss": 2.1755,
"step": 510
},
{
"epoch": 1.3,
"learning_rate": 4.7949334424038176e-05,
"loss": 2.1876,
"step": 520
},
{
"epoch": 1.33,
"learning_rate": 4.787054820237644e-05,
"loss": 2.2001,
"step": 530
},
{
"epoch": 1.35,
"learning_rate": 4.77903441372523e-05,
"loss": 2.1986,
"step": 540
},
{
"epoch": 1.38,
"learning_rate": 4.771695233018754e-05,
"loss": 2.1964,
"step": 550
},
{
"epoch": 1.4,
"learning_rate": 4.764241978167314e-05,
"loss": 2.2122,
"step": 560
},
{
"epoch": 1.43,
"learning_rate": 4.755827248631491e-05,
"loss": 2.1796,
"step": 570
},
{
"epoch": 1.45,
"learning_rate": 4.747272670680646e-05,
"loss": 2.1722,
"step": 580
},
{
"epoch": 1.48,
"learning_rate": 4.738578774649787e-05,
"loss": 2.1656,
"step": 590
},
{
"epoch": 1.5,
"learning_rate": 4.729746099510852e-05,
"loss": 2.1352,
"step": 600
},
{
"epoch": 1.53,
"learning_rate": 4.720775192839289e-05,
"loss": 2.1198,
"step": 610
},
{
"epoch": 1.55,
"learning_rate": 4.711666610780115e-05,
"loss": 2.2026,
"step": 620
},
{
"epoch": 1.58,
"learning_rate": 4.702420918013432e-05,
"loss": 2.1542,
"step": 630
},
{
"epoch": 1.6,
"learning_rate": 4.693038687719424e-05,
"loss": 2.138,
"step": 640
},
{
"epoch": 1.63,
"learning_rate": 4.6835205015428246e-05,
"loss": 2.2104,
"step": 650
},
{
"epoch": 1.65,
"learning_rate": 4.675808460613018e-05,
"loss": 2.2081,
"step": 660
},
{
"epoch": 1.68,
"learning_rate": 4.666047046472874e-05,
"loss": 2.1423,
"step": 670
},
{
"epoch": 1.7,
"learning_rate": 4.656151349777851e-05,
"loss": 2.1888,
"step": 680
},
{
"epoch": 1.73,
"learning_rate": 4.6461219840046654e-05,
"loss": 2.1702,
"step": 690
},
{
"epoch": 1.75,
"learning_rate": 4.635959570916757e-05,
"loss": 2.1713,
"step": 700
},
{
"epoch": 1.78,
"learning_rate": 4.625664740525738e-05,
"loss": 2.1861,
"step": 710
},
{
"epoch": 1.8,
"learning_rate": 4.6152381310523387e-05,
"loss": 2.157,
"step": 720
},
{
"epoch": 1.83,
"learning_rate": 4.60468038888684e-05,
"loss": 2.181,
"step": 730
},
{
"epoch": 1.85,
"learning_rate": 4.593992168549001e-05,
"loss": 2.1946,
"step": 740
},
{
"epoch": 1.88,
"learning_rate": 4.583174132647484e-05,
"loss": 2.1926,
"step": 750
},
{
"epoch": 1.9,
"learning_rate": 4.572226951838773e-05,
"loss": 2.2523,
"step": 760
},
{
"epoch": 1.93,
"learning_rate": 4.5622646309652794e-05,
"loss": 2.159,
"step": 770
},
{
"epoch": 1.95,
"learning_rate": 4.5533222818030376e-05,
"loss": 2.19,
"step": 780
},
{
"epoch": 1.98,
"learning_rate": 4.542029822186779e-05,
"loss": 2.1623,
"step": 790
},
{
"epoch": 2.0,
"learning_rate": 4.5306107683757584e-05,
"loss": 2.152,
"step": 800
},
{
"epoch": 2.03,
"learning_rate": 4.519065828286142e-05,
"loss": 2.1926,
"step": 810
},
{
"epoch": 2.05,
"learning_rate": 4.507395717638325e-05,
"loss": 2.1226,
"step": 820
},
{
"epoch": 2.08,
"learning_rate": 4.495601159912566e-05,
"loss": 2.1416,
"step": 830
},
{
"epoch": 2.1,
"learning_rate": 4.483682886304129e-05,
"loss": 2.1558,
"step": 840
},
{
"epoch": 2.13,
"learning_rate": 4.4716416356779576e-05,
"loss": 2.1399,
"step": 850
},
{
"epoch": 2.15,
"learning_rate": 4.4594781545228715e-05,
"loss": 2.1637,
"step": 860
},
{
"epoch": 2.18,
"learning_rate": 4.4471931969052816e-05,
"loss": 2.1307,
"step": 870
},
{
"epoch": 2.2,
"learning_rate": 4.434787524422448e-05,
"loss": 2.159,
"step": 880
},
{
"epoch": 2.23,
"learning_rate": 4.422261906155263e-05,
"loss": 2.1421,
"step": 890
},
{
"epoch": 2.25,
"learning_rate": 4.409617118620574e-05,
"loss": 2.1185,
"step": 900
},
{
"epoch": 2.28,
"learning_rate": 4.396853945723043e-05,
"loss": 2.1477,
"step": 910
},
{
"epoch": 2.3,
"learning_rate": 4.385266524442241e-05,
"loss": 2.0955,
"step": 920
},
{
"epoch": 2.33,
"learning_rate": 4.372280605258533e-05,
"loss": 2.1274,
"step": 930
},
{
"epoch": 2.35,
"learning_rate": 4.3591786153627247e-05,
"loss": 2.1428,
"step": 940
},
{
"epoch": 2.38,
"learning_rate": 4.345961367003414e-05,
"loss": 2.1694,
"step": 950
},
{
"epoch": 2.4,
"learning_rate": 4.332629679574566e-05,
"loss": 2.1898,
"step": 960
},
{
"epoch": 2.43,
"learning_rate": 4.319184379564716e-05,
"loss": 2.1756,
"step": 970
},
{
"epoch": 2.45,
"learning_rate": 4.305626300505728e-05,
"loss": 2.1476,
"step": 980
},
{
"epoch": 2.48,
"learning_rate": 4.2919562829211283e-05,
"loss": 2.0978,
"step": 990
},
{
"epoch": 2.5,
"learning_rate": 4.2795582599412757e-05,
"loss": 2.101,
"step": 1000
},
{
"epoch": 2.53,
"learning_rate": 4.265677899611984e-05,
"loss": 2.1342,
"step": 1010
},
{
"epoch": 2.55,
"learning_rate": 4.2516880773299425e-05,
"loss": 2.1712,
"step": 1020
},
{
"epoch": 2.58,
"learning_rate": 4.23758966038429e-05,
"loss": 2.1545,
"step": 1030
},
{
"epoch": 2.6,
"learning_rate": 4.223383522796415e-05,
"loss": 2.187,
"step": 1040
},
{
"epoch": 2.63,
"learning_rate": 4.209070545265771e-05,
"loss": 2.125,
"step": 1050
},
{
"epoch": 2.65,
"learning_rate": 4.1946516151152785e-05,
"loss": 2.1419,
"step": 1060
},
{
"epoch": 2.68,
"learning_rate": 4.1801276262363146e-05,
"loss": 2.1932,
"step": 1070
},
{
"epoch": 2.7,
"learning_rate": 4.1654994790333e-05,
"loss": 2.1424,
"step": 1080
},
{
"epoch": 2.73,
"learning_rate": 4.150768080367876e-05,
"loss": 2.172,
"step": 1090
},
{
"epoch": 2.75,
"learning_rate": 4.135934343502685e-05,
"loss": 2.1384,
"step": 1100
},
{
"epoch": 2.78,
"learning_rate": 4.120999188044754e-05,
"loss": 2.1569,
"step": 1110
},
{
"epoch": 2.8,
"learning_rate": 4.1059635398884835e-05,
"loss": 2.1748,
"step": 1120
},
{
"epoch": 2.83,
"learning_rate": 4.09234630556376e-05,
"loss": 2.1331,
"step": 1130
},
{
"epoch": 2.85,
"learning_rate": 4.0771222943858104e-05,
"loss": 2.1702,
"step": 1140
},
{
"epoch": 2.88,
"learning_rate": 4.061800510626515e-05,
"loss": 2.1401,
"step": 1150
},
{
"epoch": 2.9,
"learning_rate": 4.046381904149024e-05,
"loss": 2.135,
"step": 1160
},
{
"epoch": 2.93,
"learning_rate": 4.032423164795075e-05,
"loss": 2.1575,
"step": 1170
},
{
"epoch": 2.95,
"learning_rate": 4.016823233476036e-05,
"loss": 2.1694,
"step": 1180
},
{
"epoch": 2.98,
"learning_rate": 4.00112926777329e-05,
"loss": 2.1735,
"step": 1190
},
{
"epoch": 3.0,
"learning_rate": 3.985342240623145e-05,
"loss": 2.1575,
"step": 1200
},
{
"epoch": 3.03,
"learning_rate": 3.969463130731183e-05,
"loss": 2.1514,
"step": 1210
},
{
"epoch": 3.05,
"learning_rate": 3.953492922511593e-05,
"loss": 2.1289,
"step": 1220
},
{
"epoch": 3.08,
"learning_rate": 3.9374326060261405e-05,
"loss": 2.1241,
"step": 1230
},
{
"epoch": 3.1,
"learning_rate": 3.921283176922788e-05,
"loss": 2.1195,
"step": 1240
},
{
"epoch": 3.13,
"learning_rate": 3.905045636373971e-05,
"loss": 2.1274,
"step": 1250
},
{
"epoch": 3.15,
"learning_rate": 3.888720991014536e-05,
"loss": 2.156,
"step": 1260
},
{
"epoch": 3.18,
"learning_rate": 3.873955171914195e-05,
"loss": 2.1057,
"step": 1270
},
{
"epoch": 3.2,
"learning_rate": 3.857467819983128e-05,
"loss": 2.1177,
"step": 1280
},
{
"epoch": 3.23,
"learning_rate": 3.842557221261415e-05,
"loss": 2.1344,
"step": 1290
},
{
"epoch": 3.25,
"learning_rate": 3.825910852568385e-05,
"loss": 2.151,
"step": 1300
},
{
"epoch": 3.28,
"learning_rate": 3.8091822849696954e-05,
"loss": 2.1229,
"step": 1310
},
{
"epoch": 3.3,
"learning_rate": 3.7923725555410636e-05,
"loss": 2.1037,
"step": 1320
},
{
"epoch": 3.33,
"learning_rate": 3.7754827063897814e-05,
"loss": 2.081,
"step": 1330
},
{
"epoch": 3.35,
"learning_rate": 3.7585137845900994e-05,
"loss": 2.1012,
"step": 1340
},
{
"epoch": 3.38,
"learning_rate": 3.741466842118327e-05,
"loss": 2.0885,
"step": 1350
},
{
"epoch": 3.4,
"learning_rate": 3.7243429357876065e-05,
"loss": 2.1125,
"step": 1360
},
{
"epoch": 3.43,
"learning_rate": 3.707143127182402e-05,
"loss": 2.161,
"step": 1370
},
{
"epoch": 3.45,
"learning_rate": 3.689868482592684e-05,
"loss": 2.1364,
"step": 1380
},
{
"epoch": 3.48,
"learning_rate": 3.6725200729478285e-05,
"loss": 2.1098,
"step": 1390
},
{
"epoch": 3.5,
"learning_rate": 3.655098973750223e-05,
"loss": 2.0873,
"step": 1400
},
{
"epoch": 3.53,
"learning_rate": 3.6411104835303166e-05,
"loss": 2.1497,
"step": 1410
},
{
"epoch": 3.55,
"learning_rate": 3.62356126774527e-05,
"loss": 2.1473,
"step": 1420
},
{
"epoch": 3.58,
"learning_rate": 3.605942397573979e-05,
"loss": 2.1019,
"step": 1430
},
{
"epoch": 3.6,
"learning_rate": 3.588254965285841e-05,
"loss": 2.1162,
"step": 1440
},
{
"epoch": 3.63,
"learning_rate": 3.5705000674007126e-05,
"loss": 2.0695,
"step": 1450
},
{
"epoch": 3.65,
"learning_rate": 3.5526788046209314e-05,
"loss": 2.0566,
"step": 1460
},
{
"epoch": 3.68,
"learning_rate": 3.534792281763083e-05,
"loss": 2.139,
"step": 1470
},
{
"epoch": 3.7,
"learning_rate": 3.516841607689501e-05,
"loss": 2.1602,
"step": 1480
},
{
"epoch": 3.73,
"learning_rate": 3.4988278952395306e-05,
"loss": 2.1075,
"step": 1490
},
{
"epoch": 3.75,
"learning_rate": 3.480752261160538e-05,
"loss": 2.1359,
"step": 1500
},
{
"epoch": 3.78,
"learning_rate": 3.462615826038674e-05,
"loss": 2.0825,
"step": 1510
},
{
"epoch": 3.8,
"learning_rate": 3.444419714229409e-05,
"loss": 2.1189,
"step": 1520
},
{
"epoch": 3.83,
"learning_rate": 3.427993122295552e-05,
"loss": 2.1386,
"step": 1530
},
{
"epoch": 3.85,
"learning_rate": 3.4115199307912874e-05,
"loss": 2.1422,
"step": 1540
},
{
"epoch": 3.88,
"learning_rate": 3.3931627370515026e-05,
"loss": 2.1009,
"step": 1550
},
{
"epoch": 3.9,
"learning_rate": 3.374750172319454e-05,
"loss": 2.1018,
"step": 1560
},
{
"epoch": 3.93,
"learning_rate": 3.3562833780690705e-05,
"loss": 2.1312,
"step": 1570
},
{
"epoch": 3.95,
"learning_rate": 3.3377634991362025e-05,
"loss": 2.1156,
"step": 1580
},
{
"epoch": 3.98,
"learning_rate": 3.319191683647647e-05,
"loss": 2.1313,
"step": 1590
},
{
"epoch": 4.0,
"learning_rate": 3.3005690829499725e-05,
"loss": 2.1259,
"step": 1600
},
{
"epoch": 4.03,
"learning_rate": 3.2818968515381396e-05,
"loss": 2.1049,
"step": 1610
},
{
"epoch": 4.05,
"learning_rate": 3.263176146983931e-05,
"loss": 2.0938,
"step": 1620
},
{
"epoch": 4.08,
"learning_rate": 3.244408129864186e-05,
"loss": 2.0997,
"step": 1630
},
{
"epoch": 4.1,
"learning_rate": 3.2255939636888536e-05,
"loss": 2.0782,
"step": 1640
},
{
"epoch": 4.13,
"learning_rate": 3.20862272063615e-05,
"loss": 2.0884,
"step": 1650
},
{
"epoch": 4.15,
"learning_rate": 3.189724086913438e-05,
"loss": 2.1438,
"step": 1660
},
{
"epoch": 4.18,
"learning_rate": 3.170782694233712e-05,
"loss": 2.1015,
"step": 1670
},
{
"epoch": 4.2,
"learning_rate": 3.151799716855215e-05,
"loss": 2.1629,
"step": 1680
},
{
"epoch": 4.23,
"learning_rate": 3.132776331614205e-05,
"loss": 2.1147,
"step": 1690
},
{
"epoch": 4.25,
"learning_rate": 3.1156217108498623e-05,
"loss": 2.0732,
"step": 1700
},
{
"epoch": 4.28,
"learning_rate": 3.096524801766917e-05,
"loss": 2.0937,
"step": 1710
},
{
"epoch": 4.3,
"learning_rate": 3.077390911550731e-05,
"loss": 2.1234,
"step": 1720
},
{
"epoch": 4.33,
"learning_rate": 3.058221226393299e-05,
"loss": 2.1149,
"step": 1730
},
{
"epoch": 4.35,
"learning_rate": 3.039016934705694e-05,
"loss": 2.0971,
"step": 1740
},
{
"epoch": 4.38,
"learning_rate": 3.021704467555344e-05,
"loss": 2.0799,
"step": 1750
},
{
"epoch": 4.4,
"learning_rate": 3.002437705157225e-05,
"loss": 2.0822,
"step": 1760
},
{
"epoch": 4.43,
"learning_rate": 2.9831397944888833e-05,
"loss": 2.0616,
"step": 1770
},
{
"epoch": 4.45,
"learning_rate": 2.963811931910645e-05,
"loss": 2.0974,
"step": 1780
},
{
"epoch": 4.48,
"learning_rate": 2.9444553156396825e-05,
"loss": 2.0485,
"step": 1790
},
{
"epoch": 4.5,
"learning_rate": 2.925071145675733e-05,
"loss": 2.0942,
"step": 1800
},
{
"epoch": 4.53,
"learning_rate": 2.905660623726705e-05,
"loss": 2.0616,
"step": 1810
},
{
"epoch": 4.55,
"learning_rate": 2.8862249531341806e-05,
"loss": 2.1151,
"step": 1820
},
{
"epoch": 4.58,
"learning_rate": 2.8667653387988135e-05,
"loss": 2.1004,
"step": 1830
},
{
"epoch": 4.6,
"learning_rate": 2.8472829871056332e-05,
"loss": 2.0339,
"step": 1840
},
{
"epoch": 4.63,
"learning_rate": 2.8277791058492566e-05,
"loss": 2.0933,
"step": 1850
},
{
"epoch": 4.65,
"learning_rate": 2.8082549041590085e-05,
"loss": 2.1056,
"step": 1860
},
{
"epoch": 4.68,
"learning_rate": 2.788711592423966e-05,
"loss": 2.0557,
"step": 1870
},
{
"epoch": 4.7,
"learning_rate": 2.7691503822179187e-05,
"loss": 2.0898,
"step": 1880
},
{
"epoch": 4.73,
"learning_rate": 2.7495724862242624e-05,
"loss": 2.066,
"step": 1890
},
{
"epoch": 4.75,
"learning_rate": 2.7299791181608124e-05,
"loss": 2.0716,
"step": 1900
},
{
"epoch": 4.78,
"learning_rate": 2.710371492704566e-05,
"loss": 2.0517,
"step": 1910
},
{
"epoch": 4.8,
"learning_rate": 2.6907508254163987e-05,
"loss": 2.1075,
"step": 1920
},
{
"epoch": 4.83,
"learning_rate": 2.6711183326657036e-05,
"loss": 2.1195,
"step": 1930
},
{
"epoch": 4.85,
"learning_rate": 2.6514752315549847e-05,
"loss": 2.1067,
"step": 1940
},
{
"epoch": 4.88,
"learning_rate": 2.6337883768739192e-05,
"loss": 2.0755,
"step": 1950
},
{
"epoch": 4.9,
"learning_rate": 2.6141284752911205e-05,
"loss": 2.1118,
"step": 1960
},
{
"epoch": 4.93,
"learning_rate": 2.5944614983942044e-05,
"loss": 2.1153,
"step": 1970
},
{
"epoch": 4.95,
"learning_rate": 2.5747886654234967e-05,
"loss": 2.0992,
"step": 1980
},
{
"epoch": 4.98,
"learning_rate": 2.555111195982364e-05,
"loss": 2.1737,
"step": 1990
},
{
"epoch": 5.0,
"learning_rate": 2.5373985175381594e-05,
"loss": 2.0854,
"step": 2000
},
{
"epoch": 5.03,
"learning_rate": 2.5177155997790037e-05,
"loss": 2.0455,
"step": 2010
},
{
"epoch": 5.05,
"learning_rate": 2.4980315837537684e-05,
"loss": 2.0616,
"step": 2020
},
{
"epoch": 5.08,
"learning_rate": 2.4783476897591058e-05,
"loss": 2.0705,
"step": 2030
},
{
"epoch": 5.1,
"learning_rate": 2.458665138084104e-05,
"loss": 2.0996,
"step": 2040
},
{
"epoch": 5.13,
"learning_rate": 2.4389851489346364e-05,
"loss": 2.0676,
"step": 2050
},
{
"epoch": 5.15,
"learning_rate": 2.4193089423577125e-05,
"loss": 2.0819,
"step": 2060
},
{
"epoch": 5.18,
"learning_rate": 2.3996377381658457e-05,
"loss": 2.0449,
"step": 2070
},
{
"epoch": 5.2,
"learning_rate": 2.379972755861427e-05,
"loss": 2.1022,
"step": 2080
},
{
"epoch": 5.23,
"learning_rate": 2.3603152145611293e-05,
"loss": 2.0992,
"step": 2090
},
{
"epoch": 5.25,
"learning_rate": 2.3406663329203234e-05,
"loss": 2.067,
"step": 2100
},
{
"epoch": 5.28,
"learning_rate": 2.3210273290575333e-05,
"loss": 2.0757,
"step": 2110
},
{
"epoch": 5.3,
"learning_rate": 2.3053240561541107e-05,
"loss": 2.0731,
"step": 2120
},
{
"epoch": 5.33,
"learning_rate": 2.285705899947563e-05,
"loss": 2.069,
"step": 2130
},
{
"epoch": 5.35,
"learning_rate": 2.2661010287522057e-05,
"loss": 2.0535,
"step": 2140
},
{
"epoch": 5.38,
"learning_rate": 2.246510657958164e-05,
"loss": 2.0662,
"step": 2150
},
{
"epoch": 5.4,
"learning_rate": 2.2269360020566232e-05,
"loss": 2.1,
"step": 2160
},
{
"epoch": 5.43,
"learning_rate": 2.20737827456453e-05,
"loss": 2.0722,
"step": 2170
},
{
"epoch": 5.45,
"learning_rate": 2.1878386879493732e-05,
"loss": 2.0462,
"step": 2180
},
{
"epoch": 5.48,
"learning_rate": 2.1683184535540046e-05,
"loss": 2.0982,
"step": 2190
},
{
"epoch": 5.5,
"learning_rate": 2.1488187815215527e-05,
"loss": 2.0922,
"step": 2200
},
{
"epoch": 5.53,
"learning_rate": 2.1293408807203947e-05,
"loss": 2.0986,
"step": 2210
},
{
"epoch": 5.55,
"learning_rate": 2.1098859586692184e-05,
"loss": 2.0511,
"step": 2220
},
{
"epoch": 5.58,
"learning_rate": 2.090455221462156e-05,
"loss": 2.076,
"step": 2230
},
{
"epoch": 5.6,
"learning_rate": 2.074928854324268e-05,
"loss": 2.0808,
"step": 2240
},
{
"epoch": 5.63,
"learning_rate": 2.0555446843603178e-05,
"loss": 2.1077,
"step": 2250
},
{
"epoch": 5.65,
"learning_rate": 2.0361880680893558e-05,
"loss": 2.0549,
"step": 2260
},
{
"epoch": 5.68,
"learning_rate": 2.0168602055111173e-05,
"loss": 2.0473,
"step": 2270
},
{
"epoch": 5.7,
"learning_rate": 1.997562294842776e-05,
"loss": 2.1042,
"step": 2280
},
{
"epoch": 5.73,
"learning_rate": 1.9782955324446565e-05,
"loss": 2.0943,
"step": 2290
},
{
"epoch": 5.75,
"learning_rate": 1.9590611127460696e-05,
"loss": 2.075,
"step": 2300
},
{
"epoch": 5.78,
"learning_rate": 1.9398602281712604e-05,
"loss": 2.101,
"step": 2310
},
{
"epoch": 5.8,
"learning_rate": 1.920694069065492e-05,
"loss": 2.0379,
"step": 2320
},
{
"epoch": 5.83,
"learning_rate": 1.901563823621243e-05,
"loss": 2.0801,
"step": 2330
},
{
"epoch": 5.85,
"learning_rate": 1.882470677804552e-05,
"loss": 2.0945,
"step": 2340
},
{
"epoch": 5.88,
"learning_rate": 1.8672236683857954e-05,
"loss": 2.0446,
"step": 2350
},
{
"epoch": 5.9,
"learning_rate": 1.8482002831447852e-05,
"loss": 2.0638,
"step": 2360
},
{
"epoch": 5.93,
"learning_rate": 1.829217305766289e-05,
"loss": 2.0957,
"step": 2370
},
{
"epoch": 5.95,
"learning_rate": 1.8102759130865625e-05,
"loss": 2.0865,
"step": 2380
},
{
"epoch": 5.98,
"learning_rate": 1.7913772793638516e-05,
"loss": 2.0888,
"step": 2390
},
{
"epoch": 6.0,
"learning_rate": 1.7725225762055887e-05,
"loss": 2.1294,
"step": 2400
},
{
"epoch": 6.03,
"learning_rate": 1.7537129724957642e-05,
"loss": 2.0984,
"step": 2410
},
{
"epoch": 6.05,
"learning_rate": 1.7349496343224563e-05,
"loss": 2.0636,
"step": 2420
},
{
"epoch": 6.08,
"learning_rate": 1.7162337249055477e-05,
"loss": 2.0629,
"step": 2430
},
{
"epoch": 6.1,
"learning_rate": 1.697566404524606e-05,
"loss": 2.0017,
"step": 2440
},
{
"epoch": 6.13,
"learning_rate": 1.6808083163523542e-05,
"loss": 2.066,
"step": 2450
},
{
"epoch": 6.15,
"learning_rate": 1.6622365008637984e-05,
"loss": 2.0449,
"step": 2460
},
{
"epoch": 6.18,
"learning_rate": 1.6455662399320383e-05,
"loss": 2.0652,
"step": 2470
},
{
"epoch": 6.2,
"learning_rate": 1.6270940856409354e-05,
"loss": 2.135,
"step": 2480
},
{
"epoch": 6.23,
"learning_rate": 1.6086760465346993e-05,
"loss": 2.0709,
"step": 2490
},
{
"epoch": 6.25,
"learning_rate": 1.59031326442664e-05,
"loss": 2.0215,
"step": 2500
},
{
"epoch": 6.28,
"learning_rate": 1.5720068777044476e-05,
"loss": 2.048,
"step": 2510
},
{
"epoch": 6.3,
"learning_rate": 1.553758021259624e-05,
"loss": 2.0419,
"step": 2520
},
{
"epoch": 6.33,
"learning_rate": 1.5355678264171158e-05,
"loss": 2.0102,
"step": 2530
},
{
"epoch": 6.35,
"learning_rate": 1.5174374208651912e-05,
"loss": 2.0078,
"step": 2540
},
{
"epoch": 6.38,
"learning_rate": 1.4993679285855198e-05,
"loss": 2.0599,
"step": 2550
},
{
"epoch": 6.4,
"learning_rate": 1.4813604697834988e-05,
"loss": 2.0028,
"step": 2560
},
{
"epoch": 6.43,
"learning_rate": 1.4634161608187999e-05,
"loss": 2.0724,
"step": 2570
},
{
"epoch": 6.45,
"learning_rate": 1.4473211953790689e-05,
"loss": 2.0059,
"step": 2580
},
{
"epoch": 6.48,
"learning_rate": 1.4312790906544598e-05,
"loss": 2.0747,
"step": 2590
},
{
"epoch": 6.5,
"learning_rate": 1.4135175066798412e-05,
"loss": 2.0611,
"step": 2600
},
{
"epoch": 6.53,
"learning_rate": 1.395823278419065e-05,
"loss": 2.0831,
"step": 2610
},
{
"epoch": 6.55,
"learning_rate": 1.379956968828956e-05,
"loss": 2.0747,
"step": 2620
},
{
"epoch": 6.58,
"learning_rate": 1.3623937349914093e-05,
"loss": 2.0754,
"step": 2630
},
{
"epoch": 6.6,
"learning_rate": 1.3449010262497774e-05,
"loss": 2.0406,
"step": 2640
},
{
"epoch": 6.63,
"learning_rate": 1.3274799270521714e-05,
"loss": 2.0787,
"step": 2650
},
{
"epoch": 6.65,
"learning_rate": 1.3101315174073162e-05,
"loss": 2.0567,
"step": 2660
},
{
"epoch": 6.68,
"learning_rate": 1.2928568728175986e-05,
"loss": 2.047,
"step": 2670
},
{
"epoch": 6.7,
"learning_rate": 1.2756570642123938e-05,
"loss": 2.0585,
"step": 2680
},
{
"epoch": 6.73,
"learning_rate": 1.2585331578816738e-05,
"loss": 2.0462,
"step": 2690
},
{
"epoch": 6.75,
"learning_rate": 1.2414862154099003e-05,
"loss": 2.0656,
"step": 2700
},
{
"epoch": 6.78,
"learning_rate": 1.22451729361022e-05,
"loss": 2.0753,
"step": 2710
},
{
"epoch": 6.8,
"learning_rate": 1.2076274444589361e-05,
"loss": 2.0994,
"step": 2720
},
{
"epoch": 6.83,
"learning_rate": 1.1908177150303055e-05,
"loss": 2.064,
"step": 2730
},
{
"epoch": 6.85,
"learning_rate": 1.1740891474316157e-05,
"loss": 2.0103,
"step": 2740
},
{
"epoch": 6.88,
"learning_rate": 1.1574427787385852e-05,
"loss": 1.988,
"step": 2750
},
{
"epoch": 6.9,
"learning_rate": 1.1408796409310685e-05,
"loss": 2.0505,
"step": 2760
},
{
"epoch": 6.93,
"learning_rate": 1.1244007608290835e-05,
"loss": 2.0828,
"step": 2770
},
{
"epoch": 6.95,
"learning_rate": 1.1080071600291453e-05,
"loss": 2.0435,
"step": 2780
},
{
"epoch": 6.98,
"learning_rate": 1.0916998548409449e-05,
"loss": 2.0565,
"step": 2790
},
{
"epoch": 7.0,
"learning_rate": 1.0754798562243345e-05,
"loss": 2.0986,
"step": 2800
},
{
"epoch": 7.03,
"learning_rate": 1.0593481697266583e-05,
"loss": 2.0381,
"step": 2810
},
{
"epoch": 7.05,
"learning_rate": 1.0433057954204129e-05,
"loss": 2.0882,
"step": 2820
},
{
"epoch": 7.08,
"learning_rate": 1.0289448425675801e-05,
"loss": 2.0093,
"step": 2830
},
{
"epoch": 7.1,
"learning_rate": 1.0146577593768555e-05,
"loss": 2.0098,
"step": 2840
},
{
"epoch": 7.13,
"learning_rate": 9.988707322267102e-06,
"loss": 2.103,
"step": 2850
},
{
"epoch": 7.15,
"learning_rate": 9.831767665239652e-06,
"loss": 2.0467,
"step": 2860
},
{
"epoch": 7.18,
"learning_rate": 9.675768352049264e-06,
"loss": 2.0909,
"step": 2870
},
{
"epoch": 7.2,
"learning_rate": 9.520719053763028e-06,
"loss": 2.0834,
"step": 2880
},
{
"epoch": 7.23,
"learning_rate": 9.366629382552563e-06,
"loss": 2.0532,
"step": 2890
},
{
"epoch": 7.25,
"learning_rate": 9.213508891098064e-06,
"loss": 2.0169,
"step": 2900
},
{
"epoch": 7.28,
"learning_rate": 9.061367071996107e-06,
"loss": 2.0193,
"step": 2910
},
{
"epoch": 7.3,
"learning_rate": 8.910213357171187e-06,
"loss": 2.0574,
"step": 2920
},
{
"epoch": 7.33,
"learning_rate": 8.760057117290956e-06,
"loss": 2.015,
"step": 2930
},
{
"epoch": 7.35,
"learning_rate": 8.610907661185316e-06,
"loss": 2.036,
"step": 2940
},
{
"epoch": 7.38,
"learning_rate": 8.492319196321233e-06,
"loss": 2.0365,
"step": 2950
},
{
"epoch": 7.4,
"learning_rate": 8.345005209667003e-06,
"loss": 2.0107,
"step": 2960
},
{
"epoch": 7.43,
"learning_rate": 8.198723737636854e-06,
"loss": 2.0287,
"step": 2970
},
{
"epoch": 7.45,
"learning_rate": 8.053483848847221e-06,
"loss": 2.0332,
"step": 2980
},
{
"epoch": 7.48,
"learning_rate": 7.909294547342297e-06,
"loss": 2.0884,
"step": 2990
},
{
"epoch": 7.5,
"learning_rate": 7.766164772035856e-06,
"loss": 2.0417,
"step": 3000
},
{
"epoch": 7.53,
"learning_rate": 7.624103396157098e-06,
"loss": 2.0014,
"step": 3010
},
{
"epoch": 7.55,
"learning_rate": 7.48311922670058e-06,
"loss": 2.034,
"step": 3020
},
{
"epoch": 7.58,
"learning_rate": 7.343221003880157e-06,
"loss": 2.0507,
"step": 3030
},
{
"epoch": 7.6,
"learning_rate": 7.204417400587246e-06,
"loss": 2.0162,
"step": 3040
},
{
"epoch": 7.63,
"learning_rate": 7.066717021853064e-06,
"loss": 2.0098,
"step": 3050
},
{
"epoch": 7.65,
"learning_rate": 6.957356779480032e-06,
"loss": 2.0697,
"step": 3060
},
{
"epoch": 7.68,
"learning_rate": 6.821663670979842e-06,
"loss": 2.0674,
"step": 3070
},
{
"epoch": 7.7,
"learning_rate": 6.687097515585175e-06,
"loss": 2.0319,
"step": 3080
},
{
"epoch": 7.73,
"learning_rate": 6.553666655629675e-06,
"loss": 2.0591,
"step": 3090
},
{
"epoch": 7.75,
"learning_rate": 6.434556397500918e-06,
"loss": 2.035,
"step": 3100
},
{
"epoch": 7.78,
"learning_rate": 6.303305329523157e-06,
"loss": 2.0543,
"step": 3110
},
{
"epoch": 7.8,
"learning_rate": 6.173213349909729e-06,
"loss": 2.0553,
"step": 3120
},
{
"epoch": 7.83,
"learning_rate": 6.0442885236209165e-06,
"loss": 2.0659,
"step": 3130
},
{
"epoch": 7.85,
"learning_rate": 5.9165388432601446e-06,
"loss": 2.0417,
"step": 3140
},
{
"epoch": 7.88,
"learning_rate": 5.78997222857853e-06,
"loss": 2.0674,
"step": 3150
},
{
"epoch": 7.9,
"learning_rate": 5.664596525983814e-06,
"loss": 2.0321,
"step": 3160
},
{
"epoch": 7.93,
"learning_rate": 5.540419508054043e-06,
"loss": 2.0832,
"step": 3170
},
{
"epoch": 7.95,
"learning_rate": 5.417448873055617e-06,
"loss": 2.0485,
"step": 3180
},
{
"epoch": 7.98,
"learning_rate": 5.295692244466094e-06,
"loss": 2.0303,
"step": 3190
},
{
"epoch": 8.01,
"learning_rate": 5.17515717050156e-06,
"loss": 2.0567,
"step": 3200
},
{
"epoch": 8.03,
"learning_rate": 5.055851123648686e-06,
"loss": 2.0167,
"step": 3210
},
{
"epoch": 8.06,
"learning_rate": 4.937781500201474e-06,
"loss": 2.0506,
"step": 3220
},
{
"epoch": 8.08,
"learning_rate": 4.820955619802747e-06,
"loss": 2.0538,
"step": 3230
},
{
"epoch": 8.11,
"learning_rate": 4.705380724990327e-06,
"loss": 2.0169,
"step": 3240
},
{
"epoch": 8.13,
"learning_rate": 4.591063980748098e-06,
"loss": 2.0117,
"step": 3250
},
{
"epoch": 8.16,
"learning_rate": 4.478012474061774e-06,
"loss": 2.0198,
"step": 3260
},
{
"epoch": 8.18,
"learning_rate": 4.366233213479567e-06,
"loss": 2.053,
"step": 3270
},
{
"epoch": 8.21,
"learning_rate": 4.255733128677691e-06,
"loss": 2.0561,
"step": 3280
},
{
"epoch": 8.23,
"learning_rate": 4.146519070030757e-06,
"loss": 2.0559,
"step": 3290
},
{
"epoch": 8.26,
"learning_rate": 4.038597808187092e-06,
"loss": 1.9996,
"step": 3300
},
{
"epoch": 8.28,
"learning_rate": 3.931976033649021e-06,
"loss": 2.0846,
"step": 3310
},
{
"epoch": 8.31,
"learning_rate": 3.8266603563580475e-06,
"loss": 2.0455,
"step": 3320
},
{
"epoch": 8.33,
"learning_rate": 3.72265730528511e-06,
"loss": 2.0005,
"step": 3330
},
{
"epoch": 8.36,
"learning_rate": 3.640404290832433e-06,
"loss": 2.0414,
"step": 3340
},
{
"epoch": 8.38,
"learning_rate": 3.5387801599533475e-06,
"loss": 2.0333,
"step": 3350
},
{
"epoch": 8.41,
"learning_rate": 3.438486502221494e-06,
"loss": 2.0512,
"step": 3360
},
{
"epoch": 8.43,
"learning_rate": 3.3395295352712547e-06,
"loss": 2.02,
"step": 3370
},
{
"epoch": 8.46,
"learning_rate": 3.2419153938698292e-06,
"loss": 2.0627,
"step": 3380
},
{
"epoch": 8.48,
"learning_rate": 3.145650129536862e-06,
"loss": 2.0622,
"step": 3390
},
{
"epoch": 8.51,
"learning_rate": 3.0507397101693563e-06,
"loss": 2.0812,
"step": 3400
},
{
"epoch": 8.53,
"learning_rate": 2.9571900196716405e-06,
"loss": 2.0664,
"step": 3410
},
{
"epoch": 8.56,
"learning_rate": 2.8650068575906186e-06,
"loss": 2.0482,
"step": 3420
},
{
"epoch": 8.58,
"learning_rate": 2.7741959387562356e-06,
"loss": 2.0262,
"step": 3430
},
{
"epoch": 8.61,
"learning_rate": 2.684762892927184e-06,
"loss": 1.9972,
"step": 3440
},
{
"epoch": 8.63,
"learning_rate": 2.6054558172668607e-06,
"loss": 2.0461,
"step": 3450
},
{
"epoch": 8.66,
"learning_rate": 2.5186559339234085e-06,
"loss": 2.0364,
"step": 3460
},
{
"epoch": 8.68,
"learning_rate": 2.4417275136850853e-06,
"loss": 2.0423,
"step": 3470
},
{
"epoch": 8.71,
"learning_rate": 2.357580218326866e-06,
"loss": 2.0347,
"step": 3480
},
{
"epoch": 8.73,
"learning_rate": 2.2748366237709374e-06,
"loss": 2.0455,
"step": 3490
},
{
"epoch": 8.76,
"learning_rate": 2.193501859647948e-06,
"loss": 2.0784,
"step": 3500
},
{
"epoch": 8.78,
"learning_rate": 2.1215092912207858e-06,
"loss": 2.0324,
"step": 3510
},
{
"epoch": 8.81,
"learning_rate": 2.042865123994539e-06,
"loss": 2.0375,
"step": 3520
},
{
"epoch": 8.83,
"learning_rate": 1.9656441681103087e-06,
"loss": 2.0758,
"step": 3530
},
{
"epoch": 8.86,
"learning_rate": 1.8898512108266569e-06,
"loss": 2.054,
"step": 3540
},
{
"epoch": 8.88,
"learning_rate": 1.8154909508743517e-06,
"loss": 2.0084,
"step": 3550
},
{
"epoch": 8.91,
"learning_rate": 1.7425679981651399e-06,
"loss": 2.0572,
"step": 3560
},
{
"epoch": 8.93,
"learning_rate": 1.6710868735059005e-06,
"loss": 2.0107,
"step": 3570
},
{
"epoch": 8.96,
"learning_rate": 1.601052008318407e-06,
"loss": 2.0038,
"step": 3580
},
{
"epoch": 8.98,
"learning_rate": 1.5324677443645963e-06,
"loss": 2.076,
"step": 3590
},
{
"epoch": 9.01,
"learning_rate": 1.465338333477423e-06,
"loss": 2.0698,
"step": 3600
},
{
"epoch": 9.03,
"learning_rate": 1.3996679372972304e-06,
"loss": 2.024,
"step": 3610
},
{
"epoch": 9.06,
"learning_rate": 1.335460627013796e-06,
"loss": 2.0747,
"step": 3620
},
{
"epoch": 9.08,
"learning_rate": 1.2727203831139122e-06,
"loss": 2.0406,
"step": 3630
},
{
"epoch": 9.11,
"learning_rate": 1.211451095134633e-06,
"loss": 2.0228,
"step": 3640
},
{
"epoch": 9.13,
"learning_rate": 1.151656561422143e-06,
"loss": 1.998,
"step": 3650
},
{
"epoch": 9.16,
"learning_rate": 1.0933404888962662e-06,
"loss": 2.0368,
"step": 3660
},
{
"epoch": 9.18,
"learning_rate": 1.0365064928206853e-06,
"loss": 2.0481,
"step": 3670
},
{
"epoch": 9.21,
"learning_rate": 9.811580965787965e-07,
"loss": 2.0396,
"step": 3680
},
{
"epoch": 9.23,
"learning_rate": 9.272987314552811e-07,
"loss": 2.0122,
"step": 3690
},
{
"epoch": 9.26,
"learning_rate": 8.7493173642339e-07,
"loss": 2.0402,
"step": 3700
},
{
"epoch": 9.28,
"learning_rate": 8.24060357937953e-07,
"loss": 2.0351,
"step": 3710
},
{
"epoch": 9.31,
"learning_rate": 7.746877497340988e-07,
"loss": 2.0205,
"step": 3720
},
{
"epoch": 9.33,
"learning_rate": 7.315363826320005e-07,
"loss": 2.0078,
"step": 3730
},
{
"epoch": 9.36,
"learning_rate": 6.896036667805056e-07,
"loss": 1.9853,
"step": 3740
},
{
"epoch": 9.38,
"learning_rate": 6.444436016185052e-07,
"loss": 2.04,
"step": 3750
},
{
"epoch": 9.41,
"learning_rate": 6.007934419518746e-07,
"loss": 2.0622,
"step": 3760
},
{
"epoch": 9.43,
"learning_rate": 5.586558938414033e-07,
"loss": 2.0375,
"step": 3770
},
{
"epoch": 9.46,
"learning_rate": 5.180335695745803e-07,
"loss": 2.0382,
"step": 3780
},
{
"epoch": 9.48,
"learning_rate": 4.789289875036739e-07,
"loss": 2.0713,
"step": 3790
},
{
"epoch": 9.51,
"learning_rate": 4.4134457188959865e-07,
"loss": 2.0664,
"step": 3800
},
{
"epoch": 9.53,
"learning_rate": 4.052826527516207e-07,
"loss": 2.0882,
"step": 3810
},
{
"epoch": 9.56,
"learning_rate": 3.7074546572291315e-07,
"loss": 2.0404,
"step": 3820
},
{
"epoch": 9.58,
"learning_rate": 3.377351519119665e-07,
"loss": 2.0281,
"step": 3830
},
{
"epoch": 9.61,
"learning_rate": 3.062537577698338e-07,
"loss": 2.0655,
"step": 3840
},
{
"epoch": 9.63,
"learning_rate": 2.7922934437178695e-07,
"loss": 2.0364,
"step": 3850
},
{
"epoch": 9.66,
"learning_rate": 2.506581960055432e-07,
"loss": 2.0288,
"step": 3860
},
{
"epoch": 9.68,
"learning_rate": 2.262559558016325e-07,
"loss": 2.0442,
"step": 3870
},
{
"epoch": 9.71,
"learning_rate": 2.0060144733083375e-07,
"loss": 2.0335,
"step": 3880
},
{
"epoch": 9.73,
"learning_rate": 1.7648436003588896e-07,
"loss": 2.0149,
"step": 3890
},
{
"epoch": 9.76,
"learning_rate": 1.5390618903858013e-07,
"loss": 2.0491,
"step": 3900
},
{
"epoch": 9.78,
"learning_rate": 1.328683340566378e-07,
"loss": 2.0207,
"step": 3910
},
{
"epoch": 9.81,
"learning_rate": 1.133720993170162e-07,
"loss": 2.0427,
"step": 3920
},
{
"epoch": 9.83,
"learning_rate": 9.541869347499399e-08,
"loss": 1.9943,
"step": 3930
},
{
"epoch": 9.86,
"learning_rate": 7.900922953927303e-08,
"loss": 2.027,
"step": 3940
},
{
"epoch": 9.88,
"learning_rate": 6.414472480296418e-08,
"loss": 2.0079,
"step": 3950
},
{
"epoch": 9.91,
"learning_rate": 5.0826100780526633e-08,
"loss": 2.0986,
"step": 3960
},
{
"epoch": 9.93,
"learning_rate": 3.905418315063858e-08,
"loss": 2.0322,
"step": 3970
},
{
"epoch": 9.96,
"learning_rate": 2.8829701705010425e-08,
"loss": 2.0425,
"step": 3980
},
{
"epoch": 9.98,
"learning_rate": 2.0153290303134843e-08,
"loss": 2.0256,
"step": 3990
},
{
"epoch": 9.98,
"step": 3990,
"total_flos": 2.242568135280427e+18,
"train_loss": 2.1042114530290874,
"train_runtime": 6364.4425,
"train_samples_per_second": 80.37,
"train_steps_per_second": 0.627
}
],
"max_steps": 3990,
"num_train_epochs": 10,
"total_flos": 2.242568135280427e+18,
"trial_name": null,
"trial_params": null
}