generated from xuyuqing/ailab
1292 lines
27 KiB
JSON
1292 lines
27 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 29.868995633187772,
|
|
"global_step": 1710,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 7.692307692307694e-06,
|
|
"loss": 2.8947,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.6346153846153847e-05,
|
|
"loss": 2.8303,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 2.5e-05,
|
|
"loss": 2.5217,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 3.461538461538462e-05,
|
|
"loss": 1.9402,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 4.423076923076923e-05,
|
|
"loss": 1.1109,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_loss": 0.34536704421043396,
|
|
"eval_runtime": 6.1276,
|
|
"eval_samples_per_second": 298.65,
|
|
"eval_steps_per_second": 9.465,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 4.9999281943513655e-05,
|
|
"loss": 0.4664,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 4.999120428174692e-05,
|
|
"loss": 0.1137,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 4.9974154297308965e-05,
|
|
"loss": 0.0511,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 4.994813811147192e-05,
|
|
"loss": 0.0474,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 4.991316506454652e-05,
|
|
"loss": 0.0437,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 4.98692477125288e-05,
|
|
"loss": 0.0442,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"eval_loss": 0.03746458888053894,
|
|
"eval_runtime": 6.1667,
|
|
"eval_samples_per_second": 296.756,
|
|
"eval_steps_per_second": 9.405,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 4.981640182259224e-05,
|
|
"loss": 0.0354,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 4.975464636742702e-05,
|
|
"loss": 0.0359,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 4.96840035184285e-05,
|
|
"loss": 0.0326,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 4.960449863773723e-05,
|
|
"loss": 0.0328,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 4.951616026913348e-05,
|
|
"loss": 0.0319,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 4.941902012778944e-05,
|
|
"loss": 0.03,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"eval_loss": 0.030352076515555382,
|
|
"eval_runtime": 6.1638,
|
|
"eval_samples_per_second": 296.894,
|
|
"eval_steps_per_second": 9.41,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 3.14,
|
|
"learning_rate": 4.931311308888291e-05,
|
|
"loss": 0.0282,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 3.32,
|
|
"learning_rate": 4.9198477175076395e-05,
|
|
"loss": 0.0303,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 3.49,
|
|
"learning_rate": 4.907515354286628e-05,
|
|
"loss": 0.0255,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 3.67,
|
|
"learning_rate": 4.8943186467806814e-05,
|
|
"loss": 0.0266,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 3.84,
|
|
"learning_rate": 4.880262332861437e-05,
|
|
"loss": 0.0287,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_loss": 0.026154760271310806,
|
|
"eval_runtime": 6.1374,
|
|
"eval_samples_per_second": 298.172,
|
|
"eval_steps_per_second": 9.45,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 4.02,
|
|
"learning_rate": 4.865351459015756e-05,
|
|
"loss": 0.0255,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 4.19,
|
|
"learning_rate": 4.849591378533938e-05,
|
|
"loss": 0.0247,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 4.37,
|
|
"learning_rate": 4.832987749587785e-05,
|
|
"loss": 0.0289,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 4.54,
|
|
"learning_rate": 4.815546533199215e-05,
|
|
"loss": 0.0269,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 4.72,
|
|
"learning_rate": 4.797273991100133e-05,
|
|
"loss": 0.0207,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 4.89,
|
|
"learning_rate": 4.7781766834843524e-05,
|
|
"loss": 0.0266,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_loss": 0.028942091390490532,
|
|
"eval_runtime": 6.1327,
|
|
"eval_samples_per_second": 298.399,
|
|
"eval_steps_per_second": 9.457,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 5.07,
|
|
"learning_rate": 4.7582614666523605e-05,
|
|
"loss": 0.0246,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 5.24,
|
|
"learning_rate": 4.7375354905497724e-05,
|
|
"loss": 0.0243,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 5.41,
|
|
"learning_rate": 4.7160061962003666e-05,
|
|
"loss": 0.0244,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 5.59,
|
|
"learning_rate": 4.693681313034608e-05,
|
|
"loss": 0.0241,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 5.76,
|
|
"learning_rate": 4.670568856114641e-05,
|
|
"loss": 0.0219,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 5.94,
|
|
"learning_rate": 4.646677123256724e-05,
|
|
"loss": 0.0203,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 5.99,
|
|
"eval_loss": 0.019752835854887962,
|
|
"eval_runtime": 6.1301,
|
|
"eval_samples_per_second": 298.526,
|
|
"eval_steps_per_second": 9.461,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 6.11,
|
|
"learning_rate": 4.6220146920521554e-05,
|
|
"loss": 0.0169,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 6.29,
|
|
"learning_rate": 4.596590416787753e-05,
|
|
"loss": 0.0168,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 6.46,
|
|
"learning_rate": 4.5704134252669936e-05,
|
|
"loss": 0.0175,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 6.64,
|
|
"learning_rate": 4.5434931155329585e-05,
|
|
"loss": 0.0169,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 6.81,
|
|
"learning_rate": 4.515839152494254e-05,
|
|
"loss": 0.0156,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 6.99,
|
|
"learning_rate": 4.487461464455125e-05,
|
|
"loss": 0.0135,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 6.99,
|
|
"eval_loss": 0.016346033662557602,
|
|
"eval_runtime": 6.129,
|
|
"eval_samples_per_second": 298.578,
|
|
"eval_steps_per_second": 9.463,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 7.16,
|
|
"learning_rate": 4.4583702395509977e-05,
|
|
"loss": 0.0174,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 7.34,
|
|
"learning_rate": 4.428575922090751e-05,
|
|
"loss": 0.0162,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 7.51,
|
|
"learning_rate": 4.40116872793648e-05,
|
|
"loss": 0.0164,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 7.69,
|
|
"learning_rate": 4.370068209652951e-05,
|
|
"loss": 0.0154,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 7.86,
|
|
"learning_rate": 4.3382963009392125e-05,
|
|
"loss": 0.0127,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_loss": 0.01459033228456974,
|
|
"eval_runtime": 6.1422,
|
|
"eval_samples_per_second": 297.938,
|
|
"eval_steps_per_second": 9.443,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 8.03,
|
|
"learning_rate": 4.3058644085211516e-05,
|
|
"loss": 0.0118,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 8.21,
|
|
"learning_rate": 4.276121037944419e-05,
|
|
"loss": 0.0142,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 8.38,
|
|
"learning_rate": 4.242467447275765e-05,
|
|
"loss": 0.0089,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 8.56,
|
|
"learning_rate": 4.208188277303098e-05,
|
|
"loss": 0.0148,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 8.73,
|
|
"learning_rate": 4.173295834907286e-05,
|
|
"loss": 0.01,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 8.91,
|
|
"learning_rate": 4.137802647145788e-05,
|
|
"loss": 0.0104,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"eval_loss": 0.012057718820869923,
|
|
"eval_runtime": 6.1315,
|
|
"eval_samples_per_second": 298.458,
|
|
"eval_steps_per_second": 9.459,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 9.08,
|
|
"learning_rate": 4.101721456755193e-05,
|
|
"loss": 0.0097,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 9.26,
|
|
"learning_rate": 4.065065217576336e-05,
|
|
"loss": 0.0105,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 9.43,
|
|
"learning_rate": 4.0315938083948116e-05,
|
|
"loss": 0.0088,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 9.61,
|
|
"learning_rate": 3.993881399956962e-05,
|
|
"loss": 0.0103,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 9.78,
|
|
"learning_rate": 3.955632659383943e-05,
|
|
"loss": 0.0108,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 9.96,
|
|
"learning_rate": 3.9168613187087615e-05,
|
|
"loss": 0.0075,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 9.99,
|
|
"eval_loss": 0.009090474806725979,
|
|
"eval_runtime": 6.1329,
|
|
"eval_samples_per_second": 298.39,
|
|
"eval_steps_per_second": 9.457,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 10.13,
|
|
"learning_rate": 3.8775812975879135e-05,
|
|
"loss": 0.0079,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 10.31,
|
|
"learning_rate": 3.8378066983039454e-05,
|
|
"loss": 0.0069,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 10.48,
|
|
"learning_rate": 3.7975518007024754e-05,
|
|
"loss": 0.0087,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 10.66,
|
|
"learning_rate": 3.756831057065445e-05,
|
|
"loss": 0.0067,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 10.83,
|
|
"learning_rate": 3.715659086922478e-05,
|
|
"loss": 0.0063,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 10.99,
|
|
"eval_loss": 0.0082984184846282,
|
|
"eval_runtime": 6.1318,
|
|
"eval_samples_per_second": 298.442,
|
|
"eval_steps_per_second": 9.459,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 11.0,
|
|
"learning_rate": 3.674050671802187e-05,
|
|
"loss": 0.007,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 11.18,
|
|
"learning_rate": 3.632020749925317e-05,
|
|
"loss": 0.0078,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 11.35,
|
|
"learning_rate": 3.5895844108416446e-05,
|
|
"loss": 0.0043,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 11.53,
|
|
"learning_rate": 3.551056808136445e-05,
|
|
"loss": 0.0052,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 11.7,
|
|
"learning_rate": 3.5078903662448587e-05,
|
|
"loss": 0.0061,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 11.88,
|
|
"learning_rate": 3.464362072340011e-05,
|
|
"loss": 0.0078,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_loss": 0.006469315849244595,
|
|
"eval_runtime": 6.1408,
|
|
"eval_samples_per_second": 298.009,
|
|
"eval_steps_per_second": 9.445,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 12.05,
|
|
"learning_rate": 3.420487553916034e-05,
|
|
"loss": 0.0048,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 12.23,
|
|
"learning_rate": 3.376282562768315e-05,
|
|
"loss": 0.0049,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 12.4,
|
|
"learning_rate": 3.3317629693383014e-05,
|
|
"loss": 0.0076,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 12.58,
|
|
"learning_rate": 3.286944757015708e-05,
|
|
"loss": 0.0046,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 12.75,
|
|
"learning_rate": 3.241844016400168e-05,
|
|
"loss": 0.0042,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 12.93,
|
|
"learning_rate": 3.1964769395244063e-05,
|
|
"loss": 0.0044,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"eval_loss": 0.0038325104396790266,
|
|
"eval_runtime": 6.14,
|
|
"eval_samples_per_second": 298.046,
|
|
"eval_steps_per_second": 9.446,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 13.1,
|
|
"learning_rate": 3.1508598140409826e-05,
|
|
"loss": 0.0032,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 13.28,
|
|
"learning_rate": 3.105009017374711e-05,
|
|
"loss": 0.0027,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 13.45,
|
|
"learning_rate": 3.058941010842852e-05,
|
|
"loss": 0.0031,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 13.62,
|
|
"learning_rate": 3.0173077587446773e-05,
|
|
"loss": 0.0033,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 13.8,
|
|
"learning_rate": 2.970872678907062e-05,
|
|
"loss": 0.0042,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 13.97,
|
|
"learning_rate": 2.9242685467274866e-05,
|
|
"loss": 0.0018,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 13.99,
|
|
"eval_loss": 0.0032668341882526875,
|
|
"eval_runtime": 6.1454,
|
|
"eval_samples_per_second": 297.784,
|
|
"eval_steps_per_second": 9.438,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 14.15,
|
|
"learning_rate": 2.8775120939851414e-05,
|
|
"loss": 0.0018,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 14.32,
|
|
"learning_rate": 2.8306201071452267e-05,
|
|
"loss": 0.0028,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 14.5,
|
|
"learning_rate": 2.7836094213322866e-05,
|
|
"loss": 0.0029,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 14.67,
|
|
"learning_rate": 2.7364969142860802e-05,
|
|
"loss": 0.0031,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 14.85,
|
|
"learning_rate": 2.689299500302145e-05,
|
|
"loss": 0.0027,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 14.99,
|
|
"eval_loss": 0.0029150343034416437,
|
|
"eval_runtime": 6.138,
|
|
"eval_samples_per_second": 298.143,
|
|
"eval_steps_per_second": 9.449,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 15.02,
|
|
"learning_rate": 2.64203412415924e-05,
|
|
"loss": 0.0023,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 15.2,
|
|
"learning_rate": 2.5994512026047303e-05,
|
|
"loss": 0.0039,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 15.37,
|
|
"learning_rate": 2.5521034637036124e-05,
|
|
"loss": 0.0031,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 15.55,
|
|
"learning_rate": 2.50473701865759e-05,
|
|
"loss": 0.0015,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 15.72,
|
|
"learning_rate": 2.457368872930823e-05,
|
|
"loss": 0.0015,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 15.9,
|
|
"learning_rate": 2.4100160325980505e-05,
|
|
"loss": 0.0008,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"eval_loss": 0.0030551706440746784,
|
|
"eval_runtime": 6.1377,
|
|
"eval_samples_per_second": 298.155,
|
|
"eval_steps_per_second": 9.45,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 16.07,
|
|
"learning_rate": 2.3626954982390774e-05,
|
|
"loss": 0.0023,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 16.24,
|
|
"learning_rate": 2.3154242588352474e-05,
|
|
"loss": 0.0016,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 16.42,
|
|
"learning_rate": 2.2682192856700628e-05,
|
|
"loss": 0.003,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 16.59,
|
|
"learning_rate": 2.2210975262361784e-05,
|
|
"loss": 0.0028,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 16.77,
|
|
"learning_rate": 2.1740758981509147e-05,
|
|
"loss": 0.0026,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 16.94,
|
|
"learning_rate": 2.1271712830825163e-05,
|
|
"loss": 0.0016,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 17.0,
|
|
"eval_loss": 0.001880593947134912,
|
|
"eval_runtime": 6.1323,
|
|
"eval_samples_per_second": 298.418,
|
|
"eval_steps_per_second": 9.458,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 17.12,
|
|
"learning_rate": 2.0804005206893072e-05,
|
|
"loss": 0.0012,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 17.29,
|
|
"learning_rate": 2.033780402573924e-05,
|
|
"loss": 0.0012,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 17.47,
|
|
"learning_rate": 1.987327666254816e-05,
|
|
"loss": 0.0018,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 17.64,
|
|
"learning_rate": 1.9456771002335782e-05,
|
|
"loss": 0.0019,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 17.82,
|
|
"learning_rate": 1.8995882811412867e-05,
|
|
"loss": 0.0012,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 17.99,
|
|
"learning_rate": 1.8537150213960525e-05,
|
|
"loss": 0.0016,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 17.99,
|
|
"eval_loss": 0.0017018432263284922,
|
|
"eval_runtime": 6.1332,
|
|
"eval_samples_per_second": 298.376,
|
|
"eval_steps_per_second": 9.457,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 18.17,
|
|
"learning_rate": 1.8080737903798157e-05,
|
|
"loss": 0.0011,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 18.34,
|
|
"learning_rate": 1.762680974171741e-05,
|
|
"loss": 0.0011,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 18.52,
|
|
"learning_rate": 1.717552869665302e-05,
|
|
"loss": 0.0024,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 18.69,
|
|
"learning_rate": 1.6727056787173845e-05,
|
|
"loss": 0.0008,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 18.86,
|
|
"learning_rate": 1.6281555023315087e-05,
|
|
"loss": 0.0008,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 18.99,
|
|
"eval_loss": 0.0015651291469112039,
|
|
"eval_runtime": 6.1247,
|
|
"eval_samples_per_second": 298.791,
|
|
"eval_steps_per_second": 9.47,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 19.04,
|
|
"learning_rate": 1.583918334877255e-05,
|
|
"loss": 0.0003,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 19.21,
|
|
"learning_rate": 1.5400100583479857e-05,
|
|
"loss": 0.0015,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 19.39,
|
|
"learning_rate": 1.4964464366588948e-05,
|
|
"loss": 0.0015,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 19.56,
|
|
"learning_rate": 1.4532431099874688e-05,
|
|
"loss": 0.0006,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 19.74,
|
|
"learning_rate": 1.410415589158356e-05,
|
|
"loss": 0.0012,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 19.91,
|
|
"learning_rate": 1.372204845468198e-05,
|
|
"loss": 0.0007,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"eval_loss": 0.001444431603886187,
|
|
"eval_runtime": 6.1378,
|
|
"eval_samples_per_second": 298.154,
|
|
"eval_steps_per_second": 9.45,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 20.09,
|
|
"learning_rate": 1.3301336003907328e-05,
|
|
"loss": 0.0006,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 20.26,
|
|
"learning_rate": 1.2926281563039088e-05,
|
|
"loss": 0.0013,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 20.44,
|
|
"learning_rate": 1.2513677095522591e-05,
|
|
"loss": 0.0008,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 20.61,
|
|
"learning_rate": 1.2105555457917487e-05,
|
|
"loss": 0.0024,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 20.79,
|
|
"learning_rate": 1.1702063173735825e-05,
|
|
"loss": 0.0014,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 20.96,
|
|
"learning_rate": 1.13033451044628e-05,
|
|
"loss": 0.0011,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 21.0,
|
|
"eval_loss": 0.0011821477673947811,
|
|
"eval_runtime": 6.133,
|
|
"eval_samples_per_second": 298.387,
|
|
"eval_steps_per_second": 9.457,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 21.14,
|
|
"learning_rate": 1.0909544397548691e-05,
|
|
"loss": 0.0017,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 21.31,
|
|
"learning_rate": 1.052080243501618e-05,
|
|
"loss": 0.0005,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 21.48,
|
|
"learning_rate": 1.0175375284683295e-05,
|
|
"loss": 0.001,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 21.66,
|
|
"learning_rate": 9.796627898887788e-06,
|
|
"loss": 0.0015,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 21.83,
|
|
"learning_rate": 9.423338815887287e-06,
|
|
"loss": 0.0006,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 21.99,
|
|
"eval_loss": 0.0008947821916081011,
|
|
"eval_runtime": 6.1346,
|
|
"eval_samples_per_second": 298.309,
|
|
"eval_steps_per_second": 9.455,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 22.01,
|
|
"learning_rate": 9.05564205363727e-06,
|
|
"loss": 0.0014,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 22.18,
|
|
"learning_rate": 8.693669622342535e-06,
|
|
"loss": 0.0008,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 22.36,
|
|
"learning_rate": 8.337551477063102e-06,
|
|
"loss": 0.0009,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 22.53,
|
|
"learning_rate": 7.987415471057736e-06,
|
|
"loss": 0.0007,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 22.71,
|
|
"learning_rate": 7.643387309882255e-06,
|
|
"loss": 0.0017,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 22.88,
|
|
"learning_rate": 7.305590506258805e-06,
|
|
"loss": 0.001,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 22.99,
|
|
"eval_loss": 0.0010403270134702325,
|
|
"eval_runtime": 6.1358,
|
|
"eval_samples_per_second": 298.25,
|
|
"eval_steps_per_second": 9.453,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 23.06,
|
|
"learning_rate": 6.974146335732354e-06,
|
|
"loss": 0.0014,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 23.23,
|
|
"learning_rate": 6.6491737931305506e-06,
|
|
"loss": 0.0012,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 23.41,
|
|
"learning_rate": 6.330789549842172e-06,
|
|
"loss": 0.0014,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 23.58,
|
|
"learning_rate": 6.049971250293967e-06,
|
|
"loss": 0.001,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 23.76,
|
|
"learning_rate": 5.744417700878024e-06,
|
|
"loss": 0.0011,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 23.93,
|
|
"learning_rate": 5.445777275602179e-06,
|
|
"loss": 0.0007,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"eval_loss": 0.001160959480330348,
|
|
"eval_runtime": 6.1297,
|
|
"eval_samples_per_second": 298.546,
|
|
"eval_steps_per_second": 9.462,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 24.1,
|
|
"learning_rate": 5.1830002812897545e-06,
|
|
"loss": 0.002,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 24.28,
|
|
"learning_rate": 4.897788094152034e-06,
|
|
"loss": 0.0016,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 24.45,
|
|
"learning_rate": 4.619792987455537e-06,
|
|
"loss": 0.0011,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 24.63,
|
|
"learning_rate": 4.349114766786669e-06,
|
|
"loss": 0.0004,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 24.8,
|
|
"learning_rate": 4.08585061082912e-06,
|
|
"loss": 0.0012,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 24.98,
|
|
"learning_rate": 3.855330069555721e-06,
|
|
"loss": 0.0009,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"eval_loss": 0.0008191853994503617,
|
|
"eval_runtime": 6.1465,
|
|
"eval_samples_per_second": 297.731,
|
|
"eval_steps_per_second": 9.436,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 25.15,
|
|
"learning_rate": 3.6064108032558025e-06,
|
|
"loss": 0.0008,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 25.33,
|
|
"learning_rate": 3.3651722466649716e-06,
|
|
"loss": 0.0004,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 25.5,
|
|
"learning_rate": 3.131701009061683e-06,
|
|
"loss": 0.0013,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 25.68,
|
|
"learning_rate": 2.906080911107578e-06,
|
|
"loss": 0.0012,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 25.85,
|
|
"learning_rate": 2.6883929547542735e-06,
|
|
"loss": 0.0007,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 25.99,
|
|
"eval_loss": 0.0007047198596410453,
|
|
"eval_runtime": 6.1524,
|
|
"eval_samples_per_second": 297.444,
|
|
"eval_steps_per_second": 9.427,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 26.03,
|
|
"learning_rate": 2.4787152941620843e-06,
|
|
"loss": 0.0005,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 26.2,
|
|
"learning_rate": 2.277123207641199e-06,
|
|
"loss": 0.0011,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 26.38,
|
|
"learning_rate": 2.0836890706253026e-06,
|
|
"loss": 0.0005,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 26.55,
|
|
"learning_rate": 1.8984823296874095e-06,
|
|
"loss": 0.0013,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 26.72,
|
|
"learning_rate": 1.7215694776072128e-06,
|
|
"loss": 0.0005,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 26.9,
|
|
"learning_rate": 1.5530140294988977e-06,
|
|
"loss": 0.0015,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 26.99,
|
|
"eval_loss": 0.0006515153800137341,
|
|
"eval_runtime": 6.1481,
|
|
"eval_samples_per_second": 297.654,
|
|
"eval_steps_per_second": 9.434,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 27.07,
|
|
"learning_rate": 1.3928765000080001e-06,
|
|
"loss": 0.001,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 27.25,
|
|
"learning_rate": 1.2412143815854538e-06,
|
|
"loss": 0.0014,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 27.42,
|
|
"learning_rate": 1.0980821238467553e-06,
|
|
"loss": 0.0004,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 27.6,
|
|
"learning_rate": 9.635311140234388e-07,
|
|
"loss": 0.0004,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 27.77,
|
|
"learning_rate": 8.376096585141213e-07,
|
|
"loss": 0.0009,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 27.95,
|
|
"learning_rate": 7.203629655415628e-07,
|
|
"loss": 0.0011,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"eval_loss": 0.0006537799490615726,
|
|
"eval_runtime": 6.1314,
|
|
"eval_samples_per_second": 298.462,
|
|
"eval_steps_per_second": 9.459,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 28.12,
|
|
"learning_rate": 6.118331289220291e-07,
|
|
"loss": 0.0009,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 28.3,
|
|
"learning_rate": 5.216414570304861e-07,
|
|
"loss": 0.0009,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 28.47,
|
|
"learning_rate": 4.2977839628236815e-07,
|
|
"loss": 0.001,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 28.65,
|
|
"learning_rate": 3.467365173327158e-07,
|
|
"loss": 0.0007,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 28.82,
|
|
"learning_rate": 2.725456338121435e-07,
|
|
"loss": 0.0006,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 29.0,
|
|
"learning_rate": 2.07232381673797e-07,
|
|
"loss": 0.0008,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 29.0,
|
|
"eval_loss": 0.0006117070442996919,
|
|
"eval_runtime": 6.1377,
|
|
"eval_samples_per_second": 298.16,
|
|
"eval_steps_per_second": 9.45,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 29.17,
|
|
"learning_rate": 1.5082020963052e-07,
|
|
"loss": 0.001,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 29.34,
|
|
"learning_rate": 1.0332937073632698e-07,
|
|
"loss": 0.0008,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 29.52,
|
|
"learning_rate": 6.477691511516115e-08,
|
|
"loss": 0.0008,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 29.69,
|
|
"learning_rate": 3.517668383957173e-08,
|
|
"loss": 0.0012,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 29.87,
|
|
"learning_rate": 1.453930396150549e-08,
|
|
"loss": 0.001,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 29.87,
|
|
"eval_loss": 0.000620449660345912,
|
|
"eval_runtime": 6.1306,
|
|
"eval_samples_per_second": 298.504,
|
|
"eval_steps_per_second": 9.461,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 29.87,
|
|
"step": 1710,
|
|
"total_flos": 2.098453351468368e+18,
|
|
"train_loss": 0.0774322310971826,
|
|
"train_runtime": 2689.91,
|
|
"train_samples_per_second": 81.594,
|
|
"train_steps_per_second": 0.636
|
|
}
|
|
],
|
|
"max_steps": 1710,
|
|
"num_train_epochs": 30,
|
|
"total_flos": 2.098453351468368e+18,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|