finetuned_models/llama2_pinglun_30e/trainer_state.json

1292 lines
27 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 29.868995633187772,
"global_step": 1710,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17,
"learning_rate": 7.692307692307694e-06,
"loss": 2.8947,
"step": 10
},
{
"epoch": 0.35,
"learning_rate": 1.6346153846153847e-05,
"loss": 2.8303,
"step": 20
},
{
"epoch": 0.52,
"learning_rate": 2.5e-05,
"loss": 2.5217,
"step": 30
},
{
"epoch": 0.7,
"learning_rate": 3.461538461538462e-05,
"loss": 1.9402,
"step": 40
},
{
"epoch": 0.87,
"learning_rate": 4.423076923076923e-05,
"loss": 1.1109,
"step": 50
},
{
"epoch": 1.0,
"eval_loss": 0.34536704421043396,
"eval_runtime": 6.1276,
"eval_samples_per_second": 298.65,
"eval_steps_per_second": 9.465,
"step": 57
},
{
"epoch": 1.05,
"learning_rate": 4.9999281943513655e-05,
"loss": 0.4664,
"step": 60
},
{
"epoch": 1.22,
"learning_rate": 4.999120428174692e-05,
"loss": 0.1137,
"step": 70
},
{
"epoch": 1.4,
"learning_rate": 4.9974154297308965e-05,
"loss": 0.0511,
"step": 80
},
{
"epoch": 1.57,
"learning_rate": 4.994813811147192e-05,
"loss": 0.0474,
"step": 90
},
{
"epoch": 1.75,
"learning_rate": 4.991316506454652e-05,
"loss": 0.0437,
"step": 100
},
{
"epoch": 1.92,
"learning_rate": 4.98692477125288e-05,
"loss": 0.0442,
"step": 110
},
{
"epoch": 1.99,
"eval_loss": 0.03746458888053894,
"eval_runtime": 6.1667,
"eval_samples_per_second": 296.756,
"eval_steps_per_second": 9.405,
"step": 114
},
{
"epoch": 2.1,
"learning_rate": 4.981640182259224e-05,
"loss": 0.0354,
"step": 120
},
{
"epoch": 2.27,
"learning_rate": 4.975464636742702e-05,
"loss": 0.0359,
"step": 130
},
{
"epoch": 2.45,
"learning_rate": 4.96840035184285e-05,
"loss": 0.0326,
"step": 140
},
{
"epoch": 2.62,
"learning_rate": 4.960449863773723e-05,
"loss": 0.0328,
"step": 150
},
{
"epoch": 2.79,
"learning_rate": 4.951616026913348e-05,
"loss": 0.0319,
"step": 160
},
{
"epoch": 2.97,
"learning_rate": 4.941902012778944e-05,
"loss": 0.03,
"step": 170
},
{
"epoch": 2.99,
"eval_loss": 0.030352076515555382,
"eval_runtime": 6.1638,
"eval_samples_per_second": 296.894,
"eval_steps_per_second": 9.41,
"step": 171
},
{
"epoch": 3.14,
"learning_rate": 4.931311308888291e-05,
"loss": 0.0282,
"step": 180
},
{
"epoch": 3.32,
"learning_rate": 4.9198477175076395e-05,
"loss": 0.0303,
"step": 190
},
{
"epoch": 3.49,
"learning_rate": 4.907515354286628e-05,
"loss": 0.0255,
"step": 200
},
{
"epoch": 3.67,
"learning_rate": 4.8943186467806814e-05,
"loss": 0.0266,
"step": 210
},
{
"epoch": 3.84,
"learning_rate": 4.880262332861437e-05,
"loss": 0.0287,
"step": 220
},
{
"epoch": 4.0,
"eval_loss": 0.026154760271310806,
"eval_runtime": 6.1374,
"eval_samples_per_second": 298.172,
"eval_steps_per_second": 9.45,
"step": 229
},
{
"epoch": 4.02,
"learning_rate": 4.865351459015756e-05,
"loss": 0.0255,
"step": 230
},
{
"epoch": 4.19,
"learning_rate": 4.849591378533938e-05,
"loss": 0.0247,
"step": 240
},
{
"epoch": 4.37,
"learning_rate": 4.832987749587785e-05,
"loss": 0.0289,
"step": 250
},
{
"epoch": 4.54,
"learning_rate": 4.815546533199215e-05,
"loss": 0.0269,
"step": 260
},
{
"epoch": 4.72,
"learning_rate": 4.797273991100133e-05,
"loss": 0.0207,
"step": 270
},
{
"epoch": 4.89,
"learning_rate": 4.7781766834843524e-05,
"loss": 0.0266,
"step": 280
},
{
"epoch": 5.0,
"eval_loss": 0.028942091390490532,
"eval_runtime": 6.1327,
"eval_samples_per_second": 298.399,
"eval_steps_per_second": 9.457,
"step": 286
},
{
"epoch": 5.07,
"learning_rate": 4.7582614666523605e-05,
"loss": 0.0246,
"step": 290
},
{
"epoch": 5.24,
"learning_rate": 4.7375354905497724e-05,
"loss": 0.0243,
"step": 300
},
{
"epoch": 5.41,
"learning_rate": 4.7160061962003666e-05,
"loss": 0.0244,
"step": 310
},
{
"epoch": 5.59,
"learning_rate": 4.693681313034608e-05,
"loss": 0.0241,
"step": 320
},
{
"epoch": 5.76,
"learning_rate": 4.670568856114641e-05,
"loss": 0.0219,
"step": 330
},
{
"epoch": 5.94,
"learning_rate": 4.646677123256724e-05,
"loss": 0.0203,
"step": 340
},
{
"epoch": 5.99,
"eval_loss": 0.019752835854887962,
"eval_runtime": 6.1301,
"eval_samples_per_second": 298.526,
"eval_steps_per_second": 9.461,
"step": 343
},
{
"epoch": 6.11,
"learning_rate": 4.6220146920521554e-05,
"loss": 0.0169,
"step": 350
},
{
"epoch": 6.29,
"learning_rate": 4.596590416787753e-05,
"loss": 0.0168,
"step": 360
},
{
"epoch": 6.46,
"learning_rate": 4.5704134252669936e-05,
"loss": 0.0175,
"step": 370
},
{
"epoch": 6.64,
"learning_rate": 4.5434931155329585e-05,
"loss": 0.0169,
"step": 380
},
{
"epoch": 6.81,
"learning_rate": 4.515839152494254e-05,
"loss": 0.0156,
"step": 390
},
{
"epoch": 6.99,
"learning_rate": 4.487461464455125e-05,
"loss": 0.0135,
"step": 400
},
{
"epoch": 6.99,
"eval_loss": 0.016346033662557602,
"eval_runtime": 6.129,
"eval_samples_per_second": 298.578,
"eval_steps_per_second": 9.463,
"step": 400
},
{
"epoch": 7.16,
"learning_rate": 4.4583702395509977e-05,
"loss": 0.0174,
"step": 410
},
{
"epoch": 7.34,
"learning_rate": 4.428575922090751e-05,
"loss": 0.0162,
"step": 420
},
{
"epoch": 7.51,
"learning_rate": 4.40116872793648e-05,
"loss": 0.0164,
"step": 430
},
{
"epoch": 7.69,
"learning_rate": 4.370068209652951e-05,
"loss": 0.0154,
"step": 440
},
{
"epoch": 7.86,
"learning_rate": 4.3382963009392125e-05,
"loss": 0.0127,
"step": 450
},
{
"epoch": 8.0,
"eval_loss": 0.01459033228456974,
"eval_runtime": 6.1422,
"eval_samples_per_second": 297.938,
"eval_steps_per_second": 9.443,
"step": 458
},
{
"epoch": 8.03,
"learning_rate": 4.3058644085211516e-05,
"loss": 0.0118,
"step": 460
},
{
"epoch": 8.21,
"learning_rate": 4.276121037944419e-05,
"loss": 0.0142,
"step": 470
},
{
"epoch": 8.38,
"learning_rate": 4.242467447275765e-05,
"loss": 0.0089,
"step": 480
},
{
"epoch": 8.56,
"learning_rate": 4.208188277303098e-05,
"loss": 0.0148,
"step": 490
},
{
"epoch": 8.73,
"learning_rate": 4.173295834907286e-05,
"loss": 0.01,
"step": 500
},
{
"epoch": 8.91,
"learning_rate": 4.137802647145788e-05,
"loss": 0.0104,
"step": 510
},
{
"epoch": 9.0,
"eval_loss": 0.012057718820869923,
"eval_runtime": 6.1315,
"eval_samples_per_second": 298.458,
"eval_steps_per_second": 9.459,
"step": 515
},
{
"epoch": 9.08,
"learning_rate": 4.101721456755193e-05,
"loss": 0.0097,
"step": 520
},
{
"epoch": 9.26,
"learning_rate": 4.065065217576336e-05,
"loss": 0.0105,
"step": 530
},
{
"epoch": 9.43,
"learning_rate": 4.0315938083948116e-05,
"loss": 0.0088,
"step": 540
},
{
"epoch": 9.61,
"learning_rate": 3.993881399956962e-05,
"loss": 0.0103,
"step": 550
},
{
"epoch": 9.78,
"learning_rate": 3.955632659383943e-05,
"loss": 0.0108,
"step": 560
},
{
"epoch": 9.96,
"learning_rate": 3.9168613187087615e-05,
"loss": 0.0075,
"step": 570
},
{
"epoch": 9.99,
"eval_loss": 0.009090474806725979,
"eval_runtime": 6.1329,
"eval_samples_per_second": 298.39,
"eval_steps_per_second": 9.457,
"step": 572
},
{
"epoch": 10.13,
"learning_rate": 3.8775812975879135e-05,
"loss": 0.0079,
"step": 580
},
{
"epoch": 10.31,
"learning_rate": 3.8378066983039454e-05,
"loss": 0.0069,
"step": 590
},
{
"epoch": 10.48,
"learning_rate": 3.7975518007024754e-05,
"loss": 0.0087,
"step": 600
},
{
"epoch": 10.66,
"learning_rate": 3.756831057065445e-05,
"loss": 0.0067,
"step": 610
},
{
"epoch": 10.83,
"learning_rate": 3.715659086922478e-05,
"loss": 0.0063,
"step": 620
},
{
"epoch": 10.99,
"eval_loss": 0.0082984184846282,
"eval_runtime": 6.1318,
"eval_samples_per_second": 298.442,
"eval_steps_per_second": 9.459,
"step": 629
},
{
"epoch": 11.0,
"learning_rate": 3.674050671802187e-05,
"loss": 0.007,
"step": 630
},
{
"epoch": 11.18,
"learning_rate": 3.632020749925317e-05,
"loss": 0.0078,
"step": 640
},
{
"epoch": 11.35,
"learning_rate": 3.5895844108416446e-05,
"loss": 0.0043,
"step": 650
},
{
"epoch": 11.53,
"learning_rate": 3.551056808136445e-05,
"loss": 0.0052,
"step": 660
},
{
"epoch": 11.7,
"learning_rate": 3.5078903662448587e-05,
"loss": 0.0061,
"step": 670
},
{
"epoch": 11.88,
"learning_rate": 3.464362072340011e-05,
"loss": 0.0078,
"step": 680
},
{
"epoch": 12.0,
"eval_loss": 0.006469315849244595,
"eval_runtime": 6.1408,
"eval_samples_per_second": 298.009,
"eval_steps_per_second": 9.445,
"step": 687
},
{
"epoch": 12.05,
"learning_rate": 3.420487553916034e-05,
"loss": 0.0048,
"step": 690
},
{
"epoch": 12.23,
"learning_rate": 3.376282562768315e-05,
"loss": 0.0049,
"step": 700
},
{
"epoch": 12.4,
"learning_rate": 3.3317629693383014e-05,
"loss": 0.0076,
"step": 710
},
{
"epoch": 12.58,
"learning_rate": 3.286944757015708e-05,
"loss": 0.0046,
"step": 720
},
{
"epoch": 12.75,
"learning_rate": 3.241844016400168e-05,
"loss": 0.0042,
"step": 730
},
{
"epoch": 12.93,
"learning_rate": 3.1964769395244063e-05,
"loss": 0.0044,
"step": 740
},
{
"epoch": 13.0,
"eval_loss": 0.0038325104396790266,
"eval_runtime": 6.14,
"eval_samples_per_second": 298.046,
"eval_steps_per_second": 9.446,
"step": 744
},
{
"epoch": 13.1,
"learning_rate": 3.1508598140409826e-05,
"loss": 0.0032,
"step": 750
},
{
"epoch": 13.28,
"learning_rate": 3.105009017374711e-05,
"loss": 0.0027,
"step": 760
},
{
"epoch": 13.45,
"learning_rate": 3.058941010842852e-05,
"loss": 0.0031,
"step": 770
},
{
"epoch": 13.62,
"learning_rate": 3.0173077587446773e-05,
"loss": 0.0033,
"step": 780
},
{
"epoch": 13.8,
"learning_rate": 2.970872678907062e-05,
"loss": 0.0042,
"step": 790
},
{
"epoch": 13.97,
"learning_rate": 2.9242685467274866e-05,
"loss": 0.0018,
"step": 800
},
{
"epoch": 13.99,
"eval_loss": 0.0032668341882526875,
"eval_runtime": 6.1454,
"eval_samples_per_second": 297.784,
"eval_steps_per_second": 9.438,
"step": 801
},
{
"epoch": 14.15,
"learning_rate": 2.8775120939851414e-05,
"loss": 0.0018,
"step": 810
},
{
"epoch": 14.32,
"learning_rate": 2.8306201071452267e-05,
"loss": 0.0028,
"step": 820
},
{
"epoch": 14.5,
"learning_rate": 2.7836094213322866e-05,
"loss": 0.0029,
"step": 830
},
{
"epoch": 14.67,
"learning_rate": 2.7364969142860802e-05,
"loss": 0.0031,
"step": 840
},
{
"epoch": 14.85,
"learning_rate": 2.689299500302145e-05,
"loss": 0.0027,
"step": 850
},
{
"epoch": 14.99,
"eval_loss": 0.0029150343034416437,
"eval_runtime": 6.138,
"eval_samples_per_second": 298.143,
"eval_steps_per_second": 9.449,
"step": 858
},
{
"epoch": 15.02,
"learning_rate": 2.64203412415924e-05,
"loss": 0.0023,
"step": 860
},
{
"epoch": 15.2,
"learning_rate": 2.5994512026047303e-05,
"loss": 0.0039,
"step": 870
},
{
"epoch": 15.37,
"learning_rate": 2.5521034637036124e-05,
"loss": 0.0031,
"step": 880
},
{
"epoch": 15.55,
"learning_rate": 2.50473701865759e-05,
"loss": 0.0015,
"step": 890
},
{
"epoch": 15.72,
"learning_rate": 2.457368872930823e-05,
"loss": 0.0015,
"step": 900
},
{
"epoch": 15.9,
"learning_rate": 2.4100160325980505e-05,
"loss": 0.0008,
"step": 910
},
{
"epoch": 16.0,
"eval_loss": 0.0030551706440746784,
"eval_runtime": 6.1377,
"eval_samples_per_second": 298.155,
"eval_steps_per_second": 9.45,
"step": 916
},
{
"epoch": 16.07,
"learning_rate": 2.3626954982390774e-05,
"loss": 0.0023,
"step": 920
},
{
"epoch": 16.24,
"learning_rate": 2.3154242588352474e-05,
"loss": 0.0016,
"step": 930
},
{
"epoch": 16.42,
"learning_rate": 2.2682192856700628e-05,
"loss": 0.003,
"step": 940
},
{
"epoch": 16.59,
"learning_rate": 2.2210975262361784e-05,
"loss": 0.0028,
"step": 950
},
{
"epoch": 16.77,
"learning_rate": 2.1740758981509147e-05,
"loss": 0.0026,
"step": 960
},
{
"epoch": 16.94,
"learning_rate": 2.1271712830825163e-05,
"loss": 0.0016,
"step": 970
},
{
"epoch": 17.0,
"eval_loss": 0.001880593947134912,
"eval_runtime": 6.1323,
"eval_samples_per_second": 298.418,
"eval_steps_per_second": 9.458,
"step": 973
},
{
"epoch": 17.12,
"learning_rate": 2.0804005206893072e-05,
"loss": 0.0012,
"step": 980
},
{
"epoch": 17.29,
"learning_rate": 2.033780402573924e-05,
"loss": 0.0012,
"step": 990
},
{
"epoch": 17.47,
"learning_rate": 1.987327666254816e-05,
"loss": 0.0018,
"step": 1000
},
{
"epoch": 17.64,
"learning_rate": 1.9456771002335782e-05,
"loss": 0.0019,
"step": 1010
},
{
"epoch": 17.82,
"learning_rate": 1.8995882811412867e-05,
"loss": 0.0012,
"step": 1020
},
{
"epoch": 17.99,
"learning_rate": 1.8537150213960525e-05,
"loss": 0.0016,
"step": 1030
},
{
"epoch": 17.99,
"eval_loss": 0.0017018432263284922,
"eval_runtime": 6.1332,
"eval_samples_per_second": 298.376,
"eval_steps_per_second": 9.457,
"step": 1030
},
{
"epoch": 18.17,
"learning_rate": 1.8080737903798157e-05,
"loss": 0.0011,
"step": 1040
},
{
"epoch": 18.34,
"learning_rate": 1.762680974171741e-05,
"loss": 0.0011,
"step": 1050
},
{
"epoch": 18.52,
"learning_rate": 1.717552869665302e-05,
"loss": 0.0024,
"step": 1060
},
{
"epoch": 18.69,
"learning_rate": 1.6727056787173845e-05,
"loss": 0.0008,
"step": 1070
},
{
"epoch": 18.86,
"learning_rate": 1.6281555023315087e-05,
"loss": 0.0008,
"step": 1080
},
{
"epoch": 18.99,
"eval_loss": 0.0015651291469112039,
"eval_runtime": 6.1247,
"eval_samples_per_second": 298.791,
"eval_steps_per_second": 9.47,
"step": 1087
},
{
"epoch": 19.04,
"learning_rate": 1.583918334877255e-05,
"loss": 0.0003,
"step": 1090
},
{
"epoch": 19.21,
"learning_rate": 1.5400100583479857e-05,
"loss": 0.0015,
"step": 1100
},
{
"epoch": 19.39,
"learning_rate": 1.4964464366588948e-05,
"loss": 0.0015,
"step": 1110
},
{
"epoch": 19.56,
"learning_rate": 1.4532431099874688e-05,
"loss": 0.0006,
"step": 1120
},
{
"epoch": 19.74,
"learning_rate": 1.410415589158356e-05,
"loss": 0.0012,
"step": 1130
},
{
"epoch": 19.91,
"learning_rate": 1.372204845468198e-05,
"loss": 0.0007,
"step": 1140
},
{
"epoch": 20.0,
"eval_loss": 0.001444431603886187,
"eval_runtime": 6.1378,
"eval_samples_per_second": 298.154,
"eval_steps_per_second": 9.45,
"step": 1145
},
{
"epoch": 20.09,
"learning_rate": 1.3301336003907328e-05,
"loss": 0.0006,
"step": 1150
},
{
"epoch": 20.26,
"learning_rate": 1.2926281563039088e-05,
"loss": 0.0013,
"step": 1160
},
{
"epoch": 20.44,
"learning_rate": 1.2513677095522591e-05,
"loss": 0.0008,
"step": 1170
},
{
"epoch": 20.61,
"learning_rate": 1.2105555457917487e-05,
"loss": 0.0024,
"step": 1180
},
{
"epoch": 20.79,
"learning_rate": 1.1702063173735825e-05,
"loss": 0.0014,
"step": 1190
},
{
"epoch": 20.96,
"learning_rate": 1.13033451044628e-05,
"loss": 0.0011,
"step": 1200
},
{
"epoch": 21.0,
"eval_loss": 0.0011821477673947811,
"eval_runtime": 6.133,
"eval_samples_per_second": 298.387,
"eval_steps_per_second": 9.457,
"step": 1202
},
{
"epoch": 21.14,
"learning_rate": 1.0909544397548691e-05,
"loss": 0.0017,
"step": 1210
},
{
"epoch": 21.31,
"learning_rate": 1.052080243501618e-05,
"loss": 0.0005,
"step": 1220
},
{
"epoch": 21.48,
"learning_rate": 1.0175375284683295e-05,
"loss": 0.001,
"step": 1230
},
{
"epoch": 21.66,
"learning_rate": 9.796627898887788e-06,
"loss": 0.0015,
"step": 1240
},
{
"epoch": 21.83,
"learning_rate": 9.423338815887287e-06,
"loss": 0.0006,
"step": 1250
},
{
"epoch": 21.99,
"eval_loss": 0.0008947821916081011,
"eval_runtime": 6.1346,
"eval_samples_per_second": 298.309,
"eval_steps_per_second": 9.455,
"step": 1259
},
{
"epoch": 22.01,
"learning_rate": 9.05564205363727e-06,
"loss": 0.0014,
"step": 1260
},
{
"epoch": 22.18,
"learning_rate": 8.693669622342535e-06,
"loss": 0.0008,
"step": 1270
},
{
"epoch": 22.36,
"learning_rate": 8.337551477063102e-06,
"loss": 0.0009,
"step": 1280
},
{
"epoch": 22.53,
"learning_rate": 7.987415471057736e-06,
"loss": 0.0007,
"step": 1290
},
{
"epoch": 22.71,
"learning_rate": 7.643387309882255e-06,
"loss": 0.0017,
"step": 1300
},
{
"epoch": 22.88,
"learning_rate": 7.305590506258805e-06,
"loss": 0.001,
"step": 1310
},
{
"epoch": 22.99,
"eval_loss": 0.0010403270134702325,
"eval_runtime": 6.1358,
"eval_samples_per_second": 298.25,
"eval_steps_per_second": 9.453,
"step": 1316
},
{
"epoch": 23.06,
"learning_rate": 6.974146335732354e-06,
"loss": 0.0014,
"step": 1320
},
{
"epoch": 23.23,
"learning_rate": 6.6491737931305506e-06,
"loss": 0.0012,
"step": 1330
},
{
"epoch": 23.41,
"learning_rate": 6.330789549842172e-06,
"loss": 0.0014,
"step": 1340
},
{
"epoch": 23.58,
"learning_rate": 6.049971250293967e-06,
"loss": 0.001,
"step": 1350
},
{
"epoch": 23.76,
"learning_rate": 5.744417700878024e-06,
"loss": 0.0011,
"step": 1360
},
{
"epoch": 23.93,
"learning_rate": 5.445777275602179e-06,
"loss": 0.0007,
"step": 1370
},
{
"epoch": 24.0,
"eval_loss": 0.001160959480330348,
"eval_runtime": 6.1297,
"eval_samples_per_second": 298.546,
"eval_steps_per_second": 9.462,
"step": 1374
},
{
"epoch": 24.1,
"learning_rate": 5.1830002812897545e-06,
"loss": 0.002,
"step": 1380
},
{
"epoch": 24.28,
"learning_rate": 4.897788094152034e-06,
"loss": 0.0016,
"step": 1390
},
{
"epoch": 24.45,
"learning_rate": 4.619792987455537e-06,
"loss": 0.0011,
"step": 1400
},
{
"epoch": 24.63,
"learning_rate": 4.349114766786669e-06,
"loss": 0.0004,
"step": 1410
},
{
"epoch": 24.8,
"learning_rate": 4.08585061082912e-06,
"loss": 0.0012,
"step": 1420
},
{
"epoch": 24.98,
"learning_rate": 3.855330069555721e-06,
"loss": 0.0009,
"step": 1430
},
{
"epoch": 25.0,
"eval_loss": 0.0008191853994503617,
"eval_runtime": 6.1465,
"eval_samples_per_second": 297.731,
"eval_steps_per_second": 9.436,
"step": 1431
},
{
"epoch": 25.15,
"learning_rate": 3.6064108032558025e-06,
"loss": 0.0008,
"step": 1440
},
{
"epoch": 25.33,
"learning_rate": 3.3651722466649716e-06,
"loss": 0.0004,
"step": 1450
},
{
"epoch": 25.5,
"learning_rate": 3.131701009061683e-06,
"loss": 0.0013,
"step": 1460
},
{
"epoch": 25.68,
"learning_rate": 2.906080911107578e-06,
"loss": 0.0012,
"step": 1470
},
{
"epoch": 25.85,
"learning_rate": 2.6883929547542735e-06,
"loss": 0.0007,
"step": 1480
},
{
"epoch": 25.99,
"eval_loss": 0.0007047198596410453,
"eval_runtime": 6.1524,
"eval_samples_per_second": 297.444,
"eval_steps_per_second": 9.427,
"step": 1488
},
{
"epoch": 26.03,
"learning_rate": 2.4787152941620843e-06,
"loss": 0.0005,
"step": 1490
},
{
"epoch": 26.2,
"learning_rate": 2.277123207641199e-06,
"loss": 0.0011,
"step": 1500
},
{
"epoch": 26.38,
"learning_rate": 2.0836890706253026e-06,
"loss": 0.0005,
"step": 1510
},
{
"epoch": 26.55,
"learning_rate": 1.8984823296874095e-06,
"loss": 0.0013,
"step": 1520
},
{
"epoch": 26.72,
"learning_rate": 1.7215694776072128e-06,
"loss": 0.0005,
"step": 1530
},
{
"epoch": 26.9,
"learning_rate": 1.5530140294988977e-06,
"loss": 0.0015,
"step": 1540
},
{
"epoch": 26.99,
"eval_loss": 0.0006515153800137341,
"eval_runtime": 6.1481,
"eval_samples_per_second": 297.654,
"eval_steps_per_second": 9.434,
"step": 1545
},
{
"epoch": 27.07,
"learning_rate": 1.3928765000080001e-06,
"loss": 0.001,
"step": 1550
},
{
"epoch": 27.25,
"learning_rate": 1.2412143815854538e-06,
"loss": 0.0014,
"step": 1560
},
{
"epoch": 27.42,
"learning_rate": 1.0980821238467553e-06,
"loss": 0.0004,
"step": 1570
},
{
"epoch": 27.6,
"learning_rate": 9.635311140234388e-07,
"loss": 0.0004,
"step": 1580
},
{
"epoch": 27.77,
"learning_rate": 8.376096585141213e-07,
"loss": 0.0009,
"step": 1590
},
{
"epoch": 27.95,
"learning_rate": 7.203629655415628e-07,
"loss": 0.0011,
"step": 1600
},
{
"epoch": 28.0,
"eval_loss": 0.0006537799490615726,
"eval_runtime": 6.1314,
"eval_samples_per_second": 298.462,
"eval_steps_per_second": 9.459,
"step": 1603
},
{
"epoch": 28.12,
"learning_rate": 6.118331289220291e-07,
"loss": 0.0009,
"step": 1610
},
{
"epoch": 28.3,
"learning_rate": 5.216414570304861e-07,
"loss": 0.0009,
"step": 1620
},
{
"epoch": 28.47,
"learning_rate": 4.2977839628236815e-07,
"loss": 0.001,
"step": 1630
},
{
"epoch": 28.65,
"learning_rate": 3.467365173327158e-07,
"loss": 0.0007,
"step": 1640
},
{
"epoch": 28.82,
"learning_rate": 2.725456338121435e-07,
"loss": 0.0006,
"step": 1650
},
{
"epoch": 29.0,
"learning_rate": 2.07232381673797e-07,
"loss": 0.0008,
"step": 1660
},
{
"epoch": 29.0,
"eval_loss": 0.0006117070442996919,
"eval_runtime": 6.1377,
"eval_samples_per_second": 298.16,
"eval_steps_per_second": 9.45,
"step": 1660
},
{
"epoch": 29.17,
"learning_rate": 1.5082020963052e-07,
"loss": 0.001,
"step": 1670
},
{
"epoch": 29.34,
"learning_rate": 1.0332937073632698e-07,
"loss": 0.0008,
"step": 1680
},
{
"epoch": 29.52,
"learning_rate": 6.477691511516115e-08,
"loss": 0.0008,
"step": 1690
},
{
"epoch": 29.69,
"learning_rate": 3.517668383957173e-08,
"loss": 0.0012,
"step": 1700
},
{
"epoch": 29.87,
"learning_rate": 1.453930396150549e-08,
"loss": 0.001,
"step": 1710
},
{
"epoch": 29.87,
"eval_loss": 0.000620449660345912,
"eval_runtime": 6.1306,
"eval_samples_per_second": 298.504,
"eval_steps_per_second": 9.461,
"step": 1710
},
{
"epoch": 29.87,
"step": 1710,
"total_flos": 2.098453351468368e+18,
"train_loss": 0.0774322310971826,
"train_runtime": 2689.91,
"train_samples_per_second": 81.594,
"train_steps_per_second": 0.636
}
],
"max_steps": 1710,
"num_train_epochs": 30,
"total_flos": 2.098453351468368e+18,
"trial_name": null,
"trial_params": null
}