generated from xuyuqing/ailab
2420 lines
48 KiB
JSON
2420 lines
48 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 9.981238273921202,
|
|
"global_step": 3990,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 4.999922507133964e-05,
|
|
"loss": 2.4224,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 4.999690033339971e-05,
|
|
"loss": 2.3638,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 4.9993025930300686e-05,
|
|
"loss": 2.3235,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 4.998760210223347e-05,
|
|
"loss": 2.2665,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.998062918544442e-05,
|
|
"loss": 2.2922,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.99721076122146e-05,
|
|
"loss": 2.2615,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.996203791083291e-05,
|
|
"loss": 2.2748,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.9950420705563365e-05,
|
|
"loss": 2.265,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.9937256716606394e-05,
|
|
"loss": 2.291,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.992254676005419e-05,
|
|
"loss": 2.2525,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.990629174784009e-05,
|
|
"loss": 2.2568,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.9888492687682096e-05,
|
|
"loss": 2.1978,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.9869150683020335e-05,
|
|
"loss": 2.2498,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.984826693294874e-05,
|
|
"loss": 2.2394,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.982584273214061e-05,
|
|
"loss": 2.2257,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.98018794707684e-05,
|
|
"loss": 2.1823,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.977637863441759e-05,
|
|
"loss": 2.2213,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.9749341803994465e-05,
|
|
"loss": 2.232,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.972077065562821e-05,
|
|
"loss": 2.2008,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.969066696056699e-05,
|
|
"loss": 2.2421,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.965903258506806e-05,
|
|
"loss": 2.1941,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.9625869490282176e-05,
|
|
"loss": 2.1979,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.959117973213194e-05,
|
|
"loss": 2.182,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.955496546118439e-05,
|
|
"loss": 2.2056,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.951722892251762e-05,
|
|
"loss": 2.2046,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.947797245558168e-05,
|
|
"loss": 2.2378,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.9437198494053464e-05,
|
|
"loss": 2.2175,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.9394909565685894e-05,
|
|
"loss": 2.2233,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 4.935110829215117e-05,
|
|
"loss": 2.2411,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 4.9305797388878264e-05,
|
|
"loss": 2.2573,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 4.92589796648846e-05,
|
|
"loss": 2.2064,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 4.921065802260185e-05,
|
|
"loss": 2.2152,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 4.916083545769607e-05,
|
|
"loss": 2.2198,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 4.9109515058881925e-05,
|
|
"loss": 2.224,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 4.905670000773126e-05,
|
|
"loss": 2.2385,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.9002393578475816e-05,
|
|
"loss": 2.1891,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 4.894659913780427e-05,
|
|
"loss": 2.2387,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 4.888932014465352e-05,
|
|
"loss": 2.1901,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 4.883056014999423e-05,
|
|
"loss": 2.1921,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 4.877032279661073e-05,
|
|
"loss": 2.2066,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 4.870861181887514e-05,
|
|
"loss": 2.1981,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 4.864543104251587e-05,
|
|
"loss": 2.1755,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 4.8580784384380486e-05,
|
|
"loss": 2.1689,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 4.8528014313136675e-05,
|
|
"loss": 2.1933,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 4.8460739228178806e-05,
|
|
"loss": 2.1953,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 4.839200971128324e-05,
|
|
"loss": 2.2033,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 4.832183002328776e-05,
|
|
"loss": 2.1631,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 4.8250204514932517e-05,
|
|
"loss": 2.2011,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 4.817713762659024e-05,
|
|
"loss": 2.1802,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 4.810263388799101e-05,
|
|
"loss": 2.2153,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 4.80266979179414e-05,
|
|
"loss": 2.1755,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 4.7949334424038176e-05,
|
|
"loss": 2.1876,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 4.787054820237644e-05,
|
|
"loss": 2.2001,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 4.77903441372523e-05,
|
|
"loss": 2.1986,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 4.771695233018754e-05,
|
|
"loss": 2.1964,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 4.764241978167314e-05,
|
|
"loss": 2.2122,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 4.755827248631491e-05,
|
|
"loss": 2.1796,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 4.747272670680646e-05,
|
|
"loss": 2.1722,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 4.738578774649787e-05,
|
|
"loss": 2.1656,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 4.729746099510852e-05,
|
|
"loss": 2.1352,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 4.720775192839289e-05,
|
|
"loss": 2.1198,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 4.711666610780115e-05,
|
|
"loss": 2.2026,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 4.702420918013432e-05,
|
|
"loss": 2.1542,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 4.693038687719424e-05,
|
|
"loss": 2.138,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 4.6835205015428246e-05,
|
|
"loss": 2.2104,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 4.675808460613018e-05,
|
|
"loss": 2.2081,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 4.666047046472874e-05,
|
|
"loss": 2.1423,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 4.656151349777851e-05,
|
|
"loss": 2.1888,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 4.6461219840046654e-05,
|
|
"loss": 2.1702,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 4.635959570916757e-05,
|
|
"loss": 2.1713,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 4.625664740525738e-05,
|
|
"loss": 2.1861,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 4.6152381310523387e-05,
|
|
"loss": 2.157,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 4.60468038888684e-05,
|
|
"loss": 2.181,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 4.593992168549001e-05,
|
|
"loss": 2.1946,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 4.583174132647484e-05,
|
|
"loss": 2.1926,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 4.572226951838773e-05,
|
|
"loss": 2.2523,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 4.5622646309652794e-05,
|
|
"loss": 2.159,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 4.5533222818030376e-05,
|
|
"loss": 2.19,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 4.542029822186779e-05,
|
|
"loss": 2.1623,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 4.5306107683757584e-05,
|
|
"loss": 2.152,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 4.519065828286142e-05,
|
|
"loss": 2.1926,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 4.507395717638325e-05,
|
|
"loss": 2.1226,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 4.495601159912566e-05,
|
|
"loss": 2.1416,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.483682886304129e-05,
|
|
"loss": 2.1558,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 4.4716416356779576e-05,
|
|
"loss": 2.1399,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 4.4594781545228715e-05,
|
|
"loss": 2.1637,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 4.4471931969052816e-05,
|
|
"loss": 2.1307,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 4.434787524422448e-05,
|
|
"loss": 2.159,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 4.422261906155263e-05,
|
|
"loss": 2.1421,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 4.409617118620574e-05,
|
|
"loss": 2.1185,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 4.396853945723043e-05,
|
|
"loss": 2.1477,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 4.385266524442241e-05,
|
|
"loss": 2.0955,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 4.372280605258533e-05,
|
|
"loss": 2.1274,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 4.3591786153627247e-05,
|
|
"loss": 2.1428,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 4.345961367003414e-05,
|
|
"loss": 2.1694,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 4.332629679574566e-05,
|
|
"loss": 2.1898,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 4.319184379564716e-05,
|
|
"loss": 2.1756,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 4.305626300505728e-05,
|
|
"loss": 2.1476,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 4.2919562829211283e-05,
|
|
"loss": 2.0978,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 4.2795582599412757e-05,
|
|
"loss": 2.101,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 4.265677899611984e-05,
|
|
"loss": 2.1342,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 4.2516880773299425e-05,
|
|
"loss": 2.1712,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 4.23758966038429e-05,
|
|
"loss": 2.1545,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 4.223383522796415e-05,
|
|
"loss": 2.187,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 4.209070545265771e-05,
|
|
"loss": 2.125,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 4.1946516151152785e-05,
|
|
"loss": 2.1419,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 4.1801276262363146e-05,
|
|
"loss": 2.1932,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 4.1654994790333e-05,
|
|
"loss": 2.1424,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 4.150768080367876e-05,
|
|
"loss": 2.172,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 4.135934343502685e-05,
|
|
"loss": 2.1384,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 4.120999188044754e-05,
|
|
"loss": 2.1569,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 4.1059635398884835e-05,
|
|
"loss": 2.1748,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 4.09234630556376e-05,
|
|
"loss": 2.1331,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 4.0771222943858104e-05,
|
|
"loss": 2.1702,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 4.061800510626515e-05,
|
|
"loss": 2.1401,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 4.046381904149024e-05,
|
|
"loss": 2.135,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 4.032423164795075e-05,
|
|
"loss": 2.1575,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 4.016823233476036e-05,
|
|
"loss": 2.1694,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 4.00112926777329e-05,
|
|
"loss": 2.1735,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 3.985342240623145e-05,
|
|
"loss": 2.1575,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 3.03,
|
|
"learning_rate": 3.969463130731183e-05,
|
|
"loss": 2.1514,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 3.05,
|
|
"learning_rate": 3.953492922511593e-05,
|
|
"loss": 2.1289,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 3.08,
|
|
"learning_rate": 3.9374326060261405e-05,
|
|
"loss": 2.1241,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 3.1,
|
|
"learning_rate": 3.921283176922788e-05,
|
|
"loss": 2.1195,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 3.13,
|
|
"learning_rate": 3.905045636373971e-05,
|
|
"loss": 2.1274,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 3.15,
|
|
"learning_rate": 3.888720991014536e-05,
|
|
"loss": 2.156,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 3.18,
|
|
"learning_rate": 3.873955171914195e-05,
|
|
"loss": 2.1057,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 3.2,
|
|
"learning_rate": 3.857467819983128e-05,
|
|
"loss": 2.1177,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 3.23,
|
|
"learning_rate": 3.842557221261415e-05,
|
|
"loss": 2.1344,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 3.25,
|
|
"learning_rate": 3.825910852568385e-05,
|
|
"loss": 2.151,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 3.28,
|
|
"learning_rate": 3.8091822849696954e-05,
|
|
"loss": 2.1229,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 3.3,
|
|
"learning_rate": 3.7923725555410636e-05,
|
|
"loss": 2.1037,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 3.33,
|
|
"learning_rate": 3.7754827063897814e-05,
|
|
"loss": 2.081,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 3.35,
|
|
"learning_rate": 3.7585137845900994e-05,
|
|
"loss": 2.1012,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 3.38,
|
|
"learning_rate": 3.741466842118327e-05,
|
|
"loss": 2.0885,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 3.4,
|
|
"learning_rate": 3.7243429357876065e-05,
|
|
"loss": 2.1125,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 3.43,
|
|
"learning_rate": 3.707143127182402e-05,
|
|
"loss": 2.161,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 3.45,
|
|
"learning_rate": 3.689868482592684e-05,
|
|
"loss": 2.1364,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 3.48,
|
|
"learning_rate": 3.6725200729478285e-05,
|
|
"loss": 2.1098,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"learning_rate": 3.655098973750223e-05,
|
|
"loss": 2.0873,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 3.53,
|
|
"learning_rate": 3.6411104835303166e-05,
|
|
"loss": 2.1497,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 3.55,
|
|
"learning_rate": 3.62356126774527e-05,
|
|
"loss": 2.1473,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 3.58,
|
|
"learning_rate": 3.605942397573979e-05,
|
|
"loss": 2.1019,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 3.6,
|
|
"learning_rate": 3.588254965285841e-05,
|
|
"loss": 2.1162,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 3.63,
|
|
"learning_rate": 3.5705000674007126e-05,
|
|
"loss": 2.0695,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 3.65,
|
|
"learning_rate": 3.5526788046209314e-05,
|
|
"loss": 2.0566,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 3.68,
|
|
"learning_rate": 3.534792281763083e-05,
|
|
"loss": 2.139,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 3.7,
|
|
"learning_rate": 3.516841607689501e-05,
|
|
"loss": 2.1602,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 3.73,
|
|
"learning_rate": 3.4988278952395306e-05,
|
|
"loss": 2.1075,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"learning_rate": 3.480752261160538e-05,
|
|
"loss": 2.1359,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 3.78,
|
|
"learning_rate": 3.462615826038674e-05,
|
|
"loss": 2.0825,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 3.8,
|
|
"learning_rate": 3.444419714229409e-05,
|
|
"loss": 2.1189,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 3.83,
|
|
"learning_rate": 3.427993122295552e-05,
|
|
"loss": 2.1386,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 3.85,
|
|
"learning_rate": 3.4115199307912874e-05,
|
|
"loss": 2.1422,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 3.88,
|
|
"learning_rate": 3.3931627370515026e-05,
|
|
"loss": 2.1009,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 3.9,
|
|
"learning_rate": 3.374750172319454e-05,
|
|
"loss": 2.1018,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 3.93,
|
|
"learning_rate": 3.3562833780690705e-05,
|
|
"loss": 2.1312,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 3.95,
|
|
"learning_rate": 3.3377634991362025e-05,
|
|
"loss": 2.1156,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 3.98,
|
|
"learning_rate": 3.319191683647647e-05,
|
|
"loss": 2.1313,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"learning_rate": 3.3005690829499725e-05,
|
|
"loss": 2.1259,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 4.03,
|
|
"learning_rate": 3.2818968515381396e-05,
|
|
"loss": 2.1049,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 4.05,
|
|
"learning_rate": 3.263176146983931e-05,
|
|
"loss": 2.0938,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 4.08,
|
|
"learning_rate": 3.244408129864186e-05,
|
|
"loss": 2.0997,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 4.1,
|
|
"learning_rate": 3.2255939636888536e-05,
|
|
"loss": 2.0782,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 4.13,
|
|
"learning_rate": 3.20862272063615e-05,
|
|
"loss": 2.0884,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 4.15,
|
|
"learning_rate": 3.189724086913438e-05,
|
|
"loss": 2.1438,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 4.18,
|
|
"learning_rate": 3.170782694233712e-05,
|
|
"loss": 2.1015,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 4.2,
|
|
"learning_rate": 3.151799716855215e-05,
|
|
"loss": 2.1629,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 4.23,
|
|
"learning_rate": 3.132776331614205e-05,
|
|
"loss": 2.1147,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 4.25,
|
|
"learning_rate": 3.1156217108498623e-05,
|
|
"loss": 2.0732,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 4.28,
|
|
"learning_rate": 3.096524801766917e-05,
|
|
"loss": 2.0937,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 4.3,
|
|
"learning_rate": 3.077390911550731e-05,
|
|
"loss": 2.1234,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 4.33,
|
|
"learning_rate": 3.058221226393299e-05,
|
|
"loss": 2.1149,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 4.35,
|
|
"learning_rate": 3.039016934705694e-05,
|
|
"loss": 2.0971,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 4.38,
|
|
"learning_rate": 3.021704467555344e-05,
|
|
"loss": 2.0799,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 4.4,
|
|
"learning_rate": 3.002437705157225e-05,
|
|
"loss": 2.0822,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 4.43,
|
|
"learning_rate": 2.9831397944888833e-05,
|
|
"loss": 2.0616,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 4.45,
|
|
"learning_rate": 2.963811931910645e-05,
|
|
"loss": 2.0974,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 4.48,
|
|
"learning_rate": 2.9444553156396825e-05,
|
|
"loss": 2.0485,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 4.5,
|
|
"learning_rate": 2.925071145675733e-05,
|
|
"loss": 2.0942,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 4.53,
|
|
"learning_rate": 2.905660623726705e-05,
|
|
"loss": 2.0616,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 4.55,
|
|
"learning_rate": 2.8862249531341806e-05,
|
|
"loss": 2.1151,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 4.58,
|
|
"learning_rate": 2.8667653387988135e-05,
|
|
"loss": 2.1004,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 4.6,
|
|
"learning_rate": 2.8472829871056332e-05,
|
|
"loss": 2.0339,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 4.63,
|
|
"learning_rate": 2.8277791058492566e-05,
|
|
"loss": 2.0933,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 4.65,
|
|
"learning_rate": 2.8082549041590085e-05,
|
|
"loss": 2.1056,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 4.68,
|
|
"learning_rate": 2.788711592423966e-05,
|
|
"loss": 2.0557,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 4.7,
|
|
"learning_rate": 2.7691503822179187e-05,
|
|
"loss": 2.0898,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 4.73,
|
|
"learning_rate": 2.7495724862242624e-05,
|
|
"loss": 2.066,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 4.75,
|
|
"learning_rate": 2.7299791181608124e-05,
|
|
"loss": 2.0716,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 4.78,
|
|
"learning_rate": 2.710371492704566e-05,
|
|
"loss": 2.0517,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 4.8,
|
|
"learning_rate": 2.6907508254163987e-05,
|
|
"loss": 2.1075,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 4.83,
|
|
"learning_rate": 2.6711183326657036e-05,
|
|
"loss": 2.1195,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 4.85,
|
|
"learning_rate": 2.6514752315549847e-05,
|
|
"loss": 2.1067,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 4.88,
|
|
"learning_rate": 2.6337883768739192e-05,
|
|
"loss": 2.0755,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 4.9,
|
|
"learning_rate": 2.6141284752911205e-05,
|
|
"loss": 2.1118,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 4.93,
|
|
"learning_rate": 2.5944614983942044e-05,
|
|
"loss": 2.1153,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 4.95,
|
|
"learning_rate": 2.5747886654234967e-05,
|
|
"loss": 2.0992,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 4.98,
|
|
"learning_rate": 2.555111195982364e-05,
|
|
"loss": 2.1737,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"learning_rate": 2.5373985175381594e-05,
|
|
"loss": 2.0854,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 5.03,
|
|
"learning_rate": 2.5177155997790037e-05,
|
|
"loss": 2.0455,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 5.05,
|
|
"learning_rate": 2.4980315837537684e-05,
|
|
"loss": 2.0616,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 5.08,
|
|
"learning_rate": 2.4783476897591058e-05,
|
|
"loss": 2.0705,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 5.1,
|
|
"learning_rate": 2.458665138084104e-05,
|
|
"loss": 2.0996,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 5.13,
|
|
"learning_rate": 2.4389851489346364e-05,
|
|
"loss": 2.0676,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 5.15,
|
|
"learning_rate": 2.4193089423577125e-05,
|
|
"loss": 2.0819,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 5.18,
|
|
"learning_rate": 2.3996377381658457e-05,
|
|
"loss": 2.0449,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 5.2,
|
|
"learning_rate": 2.379972755861427e-05,
|
|
"loss": 2.1022,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 5.23,
|
|
"learning_rate": 2.3603152145611293e-05,
|
|
"loss": 2.0992,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 5.25,
|
|
"learning_rate": 2.3406663329203234e-05,
|
|
"loss": 2.067,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 5.28,
|
|
"learning_rate": 2.3210273290575333e-05,
|
|
"loss": 2.0757,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 5.3,
|
|
"learning_rate": 2.3053240561541107e-05,
|
|
"loss": 2.0731,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 5.33,
|
|
"learning_rate": 2.285705899947563e-05,
|
|
"loss": 2.069,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 5.35,
|
|
"learning_rate": 2.2661010287522057e-05,
|
|
"loss": 2.0535,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 5.38,
|
|
"learning_rate": 2.246510657958164e-05,
|
|
"loss": 2.0662,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 5.4,
|
|
"learning_rate": 2.2269360020566232e-05,
|
|
"loss": 2.1,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 5.43,
|
|
"learning_rate": 2.20737827456453e-05,
|
|
"loss": 2.0722,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 5.45,
|
|
"learning_rate": 2.1878386879493732e-05,
|
|
"loss": 2.0462,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 5.48,
|
|
"learning_rate": 2.1683184535540046e-05,
|
|
"loss": 2.0982,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 5.5,
|
|
"learning_rate": 2.1488187815215527e-05,
|
|
"loss": 2.0922,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 5.53,
|
|
"learning_rate": 2.1293408807203947e-05,
|
|
"loss": 2.0986,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 5.55,
|
|
"learning_rate": 2.1098859586692184e-05,
|
|
"loss": 2.0511,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 5.58,
|
|
"learning_rate": 2.090455221462156e-05,
|
|
"loss": 2.076,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 5.6,
|
|
"learning_rate": 2.074928854324268e-05,
|
|
"loss": 2.0808,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 5.63,
|
|
"learning_rate": 2.0555446843603178e-05,
|
|
"loss": 2.1077,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 5.65,
|
|
"learning_rate": 2.0361880680893558e-05,
|
|
"loss": 2.0549,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 5.68,
|
|
"learning_rate": 2.0168602055111173e-05,
|
|
"loss": 2.0473,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 5.7,
|
|
"learning_rate": 1.997562294842776e-05,
|
|
"loss": 2.1042,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 5.73,
|
|
"learning_rate": 1.9782955324446565e-05,
|
|
"loss": 2.0943,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 5.75,
|
|
"learning_rate": 1.9590611127460696e-05,
|
|
"loss": 2.075,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 5.78,
|
|
"learning_rate": 1.9398602281712604e-05,
|
|
"loss": 2.101,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 5.8,
|
|
"learning_rate": 1.920694069065492e-05,
|
|
"loss": 2.0379,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 5.83,
|
|
"learning_rate": 1.901563823621243e-05,
|
|
"loss": 2.0801,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 5.85,
|
|
"learning_rate": 1.882470677804552e-05,
|
|
"loss": 2.0945,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 5.88,
|
|
"learning_rate": 1.8672236683857954e-05,
|
|
"loss": 2.0446,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 5.9,
|
|
"learning_rate": 1.8482002831447852e-05,
|
|
"loss": 2.0638,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 5.93,
|
|
"learning_rate": 1.829217305766289e-05,
|
|
"loss": 2.0957,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 5.95,
|
|
"learning_rate": 1.8102759130865625e-05,
|
|
"loss": 2.0865,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 5.98,
|
|
"learning_rate": 1.7913772793638516e-05,
|
|
"loss": 2.0888,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"learning_rate": 1.7725225762055887e-05,
|
|
"loss": 2.1294,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 6.03,
|
|
"learning_rate": 1.7537129724957642e-05,
|
|
"loss": 2.0984,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 6.05,
|
|
"learning_rate": 1.7349496343224563e-05,
|
|
"loss": 2.0636,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 6.08,
|
|
"learning_rate": 1.7162337249055477e-05,
|
|
"loss": 2.0629,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 6.1,
|
|
"learning_rate": 1.697566404524606e-05,
|
|
"loss": 2.0017,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 6.13,
|
|
"learning_rate": 1.6808083163523542e-05,
|
|
"loss": 2.066,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 6.15,
|
|
"learning_rate": 1.6622365008637984e-05,
|
|
"loss": 2.0449,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 6.18,
|
|
"learning_rate": 1.6455662399320383e-05,
|
|
"loss": 2.0652,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 6.2,
|
|
"learning_rate": 1.6270940856409354e-05,
|
|
"loss": 2.135,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 6.23,
|
|
"learning_rate": 1.6086760465346993e-05,
|
|
"loss": 2.0709,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 6.25,
|
|
"learning_rate": 1.59031326442664e-05,
|
|
"loss": 2.0215,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 6.28,
|
|
"learning_rate": 1.5720068777044476e-05,
|
|
"loss": 2.048,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 6.3,
|
|
"learning_rate": 1.553758021259624e-05,
|
|
"loss": 2.0419,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 6.33,
|
|
"learning_rate": 1.5355678264171158e-05,
|
|
"loss": 2.0102,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 6.35,
|
|
"learning_rate": 1.5174374208651912e-05,
|
|
"loss": 2.0078,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 6.38,
|
|
"learning_rate": 1.4993679285855198e-05,
|
|
"loss": 2.0599,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 6.4,
|
|
"learning_rate": 1.4813604697834988e-05,
|
|
"loss": 2.0028,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 6.43,
|
|
"learning_rate": 1.4634161608187999e-05,
|
|
"loss": 2.0724,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 6.45,
|
|
"learning_rate": 1.4473211953790689e-05,
|
|
"loss": 2.0059,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 6.48,
|
|
"learning_rate": 1.4312790906544598e-05,
|
|
"loss": 2.0747,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 6.5,
|
|
"learning_rate": 1.4135175066798412e-05,
|
|
"loss": 2.0611,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 6.53,
|
|
"learning_rate": 1.395823278419065e-05,
|
|
"loss": 2.0831,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 6.55,
|
|
"learning_rate": 1.379956968828956e-05,
|
|
"loss": 2.0747,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 6.58,
|
|
"learning_rate": 1.3623937349914093e-05,
|
|
"loss": 2.0754,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 6.6,
|
|
"learning_rate": 1.3449010262497774e-05,
|
|
"loss": 2.0406,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 6.63,
|
|
"learning_rate": 1.3274799270521714e-05,
|
|
"loss": 2.0787,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 6.65,
|
|
"learning_rate": 1.3101315174073162e-05,
|
|
"loss": 2.0567,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 6.68,
|
|
"learning_rate": 1.2928568728175986e-05,
|
|
"loss": 2.047,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 6.7,
|
|
"learning_rate": 1.2756570642123938e-05,
|
|
"loss": 2.0585,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 6.73,
|
|
"learning_rate": 1.2585331578816738e-05,
|
|
"loss": 2.0462,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 6.75,
|
|
"learning_rate": 1.2414862154099003e-05,
|
|
"loss": 2.0656,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 6.78,
|
|
"learning_rate": 1.22451729361022e-05,
|
|
"loss": 2.0753,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 6.8,
|
|
"learning_rate": 1.2076274444589361e-05,
|
|
"loss": 2.0994,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 6.83,
|
|
"learning_rate": 1.1908177150303055e-05,
|
|
"loss": 2.064,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 6.85,
|
|
"learning_rate": 1.1740891474316157e-05,
|
|
"loss": 2.0103,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 6.88,
|
|
"learning_rate": 1.1574427787385852e-05,
|
|
"loss": 1.988,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 6.9,
|
|
"learning_rate": 1.1408796409310685e-05,
|
|
"loss": 2.0505,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 6.93,
|
|
"learning_rate": 1.1244007608290835e-05,
|
|
"loss": 2.0828,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 6.95,
|
|
"learning_rate": 1.1080071600291453e-05,
|
|
"loss": 2.0435,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 6.98,
|
|
"learning_rate": 1.0916998548409449e-05,
|
|
"loss": 2.0565,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"learning_rate": 1.0754798562243345e-05,
|
|
"loss": 2.0986,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 7.03,
|
|
"learning_rate": 1.0593481697266583e-05,
|
|
"loss": 2.0381,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 7.05,
|
|
"learning_rate": 1.0433057954204129e-05,
|
|
"loss": 2.0882,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 7.08,
|
|
"learning_rate": 1.0289448425675801e-05,
|
|
"loss": 2.0093,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 7.1,
|
|
"learning_rate": 1.0146577593768555e-05,
|
|
"loss": 2.0098,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 7.13,
|
|
"learning_rate": 9.988707322267102e-06,
|
|
"loss": 2.103,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 7.15,
|
|
"learning_rate": 9.831767665239652e-06,
|
|
"loss": 2.0467,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 7.18,
|
|
"learning_rate": 9.675768352049264e-06,
|
|
"loss": 2.0909,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 7.2,
|
|
"learning_rate": 9.520719053763028e-06,
|
|
"loss": 2.0834,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 7.23,
|
|
"learning_rate": 9.366629382552563e-06,
|
|
"loss": 2.0532,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 7.25,
|
|
"learning_rate": 9.213508891098064e-06,
|
|
"loss": 2.0169,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 7.28,
|
|
"learning_rate": 9.061367071996107e-06,
|
|
"loss": 2.0193,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 7.3,
|
|
"learning_rate": 8.910213357171187e-06,
|
|
"loss": 2.0574,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 7.33,
|
|
"learning_rate": 8.760057117290956e-06,
|
|
"loss": 2.015,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 7.35,
|
|
"learning_rate": 8.610907661185316e-06,
|
|
"loss": 2.036,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 7.38,
|
|
"learning_rate": 8.492319196321233e-06,
|
|
"loss": 2.0365,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 7.4,
|
|
"learning_rate": 8.345005209667003e-06,
|
|
"loss": 2.0107,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 7.43,
|
|
"learning_rate": 8.198723737636854e-06,
|
|
"loss": 2.0287,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 7.45,
|
|
"learning_rate": 8.053483848847221e-06,
|
|
"loss": 2.0332,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 7.48,
|
|
"learning_rate": 7.909294547342297e-06,
|
|
"loss": 2.0884,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 7.5,
|
|
"learning_rate": 7.766164772035856e-06,
|
|
"loss": 2.0417,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 7.53,
|
|
"learning_rate": 7.624103396157098e-06,
|
|
"loss": 2.0014,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 7.55,
|
|
"learning_rate": 7.48311922670058e-06,
|
|
"loss": 2.034,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 7.58,
|
|
"learning_rate": 7.343221003880157e-06,
|
|
"loss": 2.0507,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 7.6,
|
|
"learning_rate": 7.204417400587246e-06,
|
|
"loss": 2.0162,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 7.63,
|
|
"learning_rate": 7.066717021853064e-06,
|
|
"loss": 2.0098,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 7.65,
|
|
"learning_rate": 6.957356779480032e-06,
|
|
"loss": 2.0697,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 7.68,
|
|
"learning_rate": 6.821663670979842e-06,
|
|
"loss": 2.0674,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 7.7,
|
|
"learning_rate": 6.687097515585175e-06,
|
|
"loss": 2.0319,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 7.73,
|
|
"learning_rate": 6.553666655629675e-06,
|
|
"loss": 2.0591,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 7.75,
|
|
"learning_rate": 6.434556397500918e-06,
|
|
"loss": 2.035,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 7.78,
|
|
"learning_rate": 6.303305329523157e-06,
|
|
"loss": 2.0543,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 7.8,
|
|
"learning_rate": 6.173213349909729e-06,
|
|
"loss": 2.0553,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 7.83,
|
|
"learning_rate": 6.0442885236209165e-06,
|
|
"loss": 2.0659,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 7.85,
|
|
"learning_rate": 5.9165388432601446e-06,
|
|
"loss": 2.0417,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 7.88,
|
|
"learning_rate": 5.78997222857853e-06,
|
|
"loss": 2.0674,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 7.9,
|
|
"learning_rate": 5.664596525983814e-06,
|
|
"loss": 2.0321,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 7.93,
|
|
"learning_rate": 5.540419508054043e-06,
|
|
"loss": 2.0832,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 7.95,
|
|
"learning_rate": 5.417448873055617e-06,
|
|
"loss": 2.0485,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 7.98,
|
|
"learning_rate": 5.295692244466094e-06,
|
|
"loss": 2.0303,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 8.01,
|
|
"learning_rate": 5.17515717050156e-06,
|
|
"loss": 2.0567,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 8.03,
|
|
"learning_rate": 5.055851123648686e-06,
|
|
"loss": 2.0167,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 8.06,
|
|
"learning_rate": 4.937781500201474e-06,
|
|
"loss": 2.0506,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 8.08,
|
|
"learning_rate": 4.820955619802747e-06,
|
|
"loss": 2.0538,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 8.11,
|
|
"learning_rate": 4.705380724990327e-06,
|
|
"loss": 2.0169,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 8.13,
|
|
"learning_rate": 4.591063980748098e-06,
|
|
"loss": 2.0117,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 8.16,
|
|
"learning_rate": 4.478012474061774e-06,
|
|
"loss": 2.0198,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 8.18,
|
|
"learning_rate": 4.366233213479567e-06,
|
|
"loss": 2.053,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 8.21,
|
|
"learning_rate": 4.255733128677691e-06,
|
|
"loss": 2.0561,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 8.23,
|
|
"learning_rate": 4.146519070030757e-06,
|
|
"loss": 2.0559,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 8.26,
|
|
"learning_rate": 4.038597808187092e-06,
|
|
"loss": 1.9996,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 8.28,
|
|
"learning_rate": 3.931976033649021e-06,
|
|
"loss": 2.0846,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 8.31,
|
|
"learning_rate": 3.8266603563580475e-06,
|
|
"loss": 2.0455,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 8.33,
|
|
"learning_rate": 3.72265730528511e-06,
|
|
"loss": 2.0005,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 8.36,
|
|
"learning_rate": 3.640404290832433e-06,
|
|
"loss": 2.0414,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 8.38,
|
|
"learning_rate": 3.5387801599533475e-06,
|
|
"loss": 2.0333,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 8.41,
|
|
"learning_rate": 3.438486502221494e-06,
|
|
"loss": 2.0512,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 8.43,
|
|
"learning_rate": 3.3395295352712547e-06,
|
|
"loss": 2.02,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 8.46,
|
|
"learning_rate": 3.2419153938698292e-06,
|
|
"loss": 2.0627,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 8.48,
|
|
"learning_rate": 3.145650129536862e-06,
|
|
"loss": 2.0622,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 8.51,
|
|
"learning_rate": 3.0507397101693563e-06,
|
|
"loss": 2.0812,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 8.53,
|
|
"learning_rate": 2.9571900196716405e-06,
|
|
"loss": 2.0664,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 8.56,
|
|
"learning_rate": 2.8650068575906186e-06,
|
|
"loss": 2.0482,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 8.58,
|
|
"learning_rate": 2.7741959387562356e-06,
|
|
"loss": 2.0262,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 8.61,
|
|
"learning_rate": 2.684762892927184e-06,
|
|
"loss": 1.9972,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 8.63,
|
|
"learning_rate": 2.6054558172668607e-06,
|
|
"loss": 2.0461,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 8.66,
|
|
"learning_rate": 2.5186559339234085e-06,
|
|
"loss": 2.0364,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 8.68,
|
|
"learning_rate": 2.4417275136850853e-06,
|
|
"loss": 2.0423,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 8.71,
|
|
"learning_rate": 2.357580218326866e-06,
|
|
"loss": 2.0347,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 8.73,
|
|
"learning_rate": 2.2748366237709374e-06,
|
|
"loss": 2.0455,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 8.76,
|
|
"learning_rate": 2.193501859647948e-06,
|
|
"loss": 2.0784,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 8.78,
|
|
"learning_rate": 2.1215092912207858e-06,
|
|
"loss": 2.0324,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 8.81,
|
|
"learning_rate": 2.042865123994539e-06,
|
|
"loss": 2.0375,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 8.83,
|
|
"learning_rate": 1.9656441681103087e-06,
|
|
"loss": 2.0758,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 8.86,
|
|
"learning_rate": 1.8898512108266569e-06,
|
|
"loss": 2.054,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 8.88,
|
|
"learning_rate": 1.8154909508743517e-06,
|
|
"loss": 2.0084,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 8.91,
|
|
"learning_rate": 1.7425679981651399e-06,
|
|
"loss": 2.0572,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 8.93,
|
|
"learning_rate": 1.6710868735059005e-06,
|
|
"loss": 2.0107,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 8.96,
|
|
"learning_rate": 1.601052008318407e-06,
|
|
"loss": 2.0038,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 8.98,
|
|
"learning_rate": 1.5324677443645963e-06,
|
|
"loss": 2.076,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 9.01,
|
|
"learning_rate": 1.465338333477423e-06,
|
|
"loss": 2.0698,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 9.03,
|
|
"learning_rate": 1.3996679372972304e-06,
|
|
"loss": 2.024,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 9.06,
|
|
"learning_rate": 1.335460627013796e-06,
|
|
"loss": 2.0747,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 9.08,
|
|
"learning_rate": 1.2727203831139122e-06,
|
|
"loss": 2.0406,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 9.11,
|
|
"learning_rate": 1.211451095134633e-06,
|
|
"loss": 2.0228,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 9.13,
|
|
"learning_rate": 1.151656561422143e-06,
|
|
"loss": 1.998,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 9.16,
|
|
"learning_rate": 1.0933404888962662e-06,
|
|
"loss": 2.0368,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 9.18,
|
|
"learning_rate": 1.0365064928206853e-06,
|
|
"loss": 2.0481,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 9.21,
|
|
"learning_rate": 9.811580965787965e-07,
|
|
"loss": 2.0396,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 9.23,
|
|
"learning_rate": 9.272987314552811e-07,
|
|
"loss": 2.0122,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 9.26,
|
|
"learning_rate": 8.7493173642339e-07,
|
|
"loss": 2.0402,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 9.28,
|
|
"learning_rate": 8.24060357937953e-07,
|
|
"loss": 2.0351,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 9.31,
|
|
"learning_rate": 7.746877497340988e-07,
|
|
"loss": 2.0205,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 9.33,
|
|
"learning_rate": 7.315363826320005e-07,
|
|
"loss": 2.0078,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 9.36,
|
|
"learning_rate": 6.896036667805056e-07,
|
|
"loss": 1.9853,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 9.38,
|
|
"learning_rate": 6.444436016185052e-07,
|
|
"loss": 2.04,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 9.41,
|
|
"learning_rate": 6.007934419518746e-07,
|
|
"loss": 2.0622,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 9.43,
|
|
"learning_rate": 5.586558938414033e-07,
|
|
"loss": 2.0375,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 9.46,
|
|
"learning_rate": 5.180335695745803e-07,
|
|
"loss": 2.0382,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 9.48,
|
|
"learning_rate": 4.789289875036739e-07,
|
|
"loss": 2.0713,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 9.51,
|
|
"learning_rate": 4.4134457188959865e-07,
|
|
"loss": 2.0664,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 9.53,
|
|
"learning_rate": 4.052826527516207e-07,
|
|
"loss": 2.0882,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 9.56,
|
|
"learning_rate": 3.7074546572291315e-07,
|
|
"loss": 2.0404,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 9.58,
|
|
"learning_rate": 3.377351519119665e-07,
|
|
"loss": 2.0281,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 9.61,
|
|
"learning_rate": 3.062537577698338e-07,
|
|
"loss": 2.0655,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 9.63,
|
|
"learning_rate": 2.7922934437178695e-07,
|
|
"loss": 2.0364,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 9.66,
|
|
"learning_rate": 2.506581960055432e-07,
|
|
"loss": 2.0288,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 9.68,
|
|
"learning_rate": 2.262559558016325e-07,
|
|
"loss": 2.0442,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 9.71,
|
|
"learning_rate": 2.0060144733083375e-07,
|
|
"loss": 2.0335,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 9.73,
|
|
"learning_rate": 1.7648436003588896e-07,
|
|
"loss": 2.0149,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 9.76,
|
|
"learning_rate": 1.5390618903858013e-07,
|
|
"loss": 2.0491,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 9.78,
|
|
"learning_rate": 1.328683340566378e-07,
|
|
"loss": 2.0207,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 9.81,
|
|
"learning_rate": 1.133720993170162e-07,
|
|
"loss": 2.0427,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 9.83,
|
|
"learning_rate": 9.541869347499399e-08,
|
|
"loss": 1.9943,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 9.86,
|
|
"learning_rate": 7.900922953927303e-08,
|
|
"loss": 2.027,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 9.88,
|
|
"learning_rate": 6.414472480296418e-08,
|
|
"loss": 2.0079,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 9.91,
|
|
"learning_rate": 5.0826100780526633e-08,
|
|
"loss": 2.0986,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 9.93,
|
|
"learning_rate": 3.905418315063858e-08,
|
|
"loss": 2.0322,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 9.96,
|
|
"learning_rate": 2.8829701705010425e-08,
|
|
"loss": 2.0425,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 9.98,
|
|
"learning_rate": 2.0153290303134843e-08,
|
|
"loss": 2.0256,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 9.98,
|
|
"step": 3990,
|
|
"total_flos": 2.242568135280427e+18,
|
|
"train_loss": 2.1042114530290874,
|
|
"train_runtime": 6364.4425,
|
|
"train_samples_per_second": 80.37,
|
|
"train_steps_per_second": 0.627
|
|
}
|
|
],
|
|
"max_steps": 3990,
|
|
"num_train_epochs": 10,
|
|
"total_flos": 2.242568135280427e+18,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|