generated from xuyuqing/ailab
414 lines
8.4 KiB
JSON
414 lines
8.4 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"global_step": 580,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 2.777777777777778e-05,
|
|
"loss": 1.7619,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.999843759868819e-05,
|
|
"loss": 1.4356,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.994377405526308e-05,
|
|
"loss": 0.8933,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.981118562438414e-05,
|
|
"loss": 0.3785,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.960108651516545e-05,
|
|
"loss": 0.1976,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.93141330817499e-05,
|
|
"loss": 0.119,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.895122177284465e-05,
|
|
"loss": 0.0819,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.851348633119606e-05,
|
|
"loss": 0.0655,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 4.800229425175294e-05,
|
|
"loss": 0.0579,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 4.741924250958289e-05,
|
|
"loss": 0.0531,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 4.676615257088776e-05,
|
|
"loss": 0.0491,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_loss": 0.04749821871519089,
|
|
"eval_runtime": 8.519,
|
|
"eval_samples_per_second": 435.733,
|
|
"eval_steps_per_second": 13.617,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 4.604506470270403e-05,
|
|
"loss": 0.0466,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 4.525823159906459e-05,
|
|
"loss": 0.0418,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 4.440811134353412e-05,
|
|
"loss": 0.0407,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 4.349735973010305e-05,
|
|
"loss": 0.0404,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 4.252882196642992e-05,
|
|
"loss": 0.0402,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 4.150552378535137e-05,
|
|
"loss": 0.0381,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 4.043066199242762e-05,
|
|
"loss": 0.038,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 3.930759447905298e-05,
|
|
"loss": 0.0357,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 3.813982973233083e-05,
|
|
"loss": 0.0345,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 3.693101587448436e-05,
|
|
"loss": 0.0329,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 3.568492926604412e-05,
|
|
"loss": 0.0323,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 3.440546270841639e-05,
|
|
"loss": 0.0344,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_loss": 0.03341960161924362,
|
|
"eval_runtime": 8.519,
|
|
"eval_samples_per_second": 435.734,
|
|
"eval_steps_per_second": 13.617,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 3.309661328268776e-05,
|
|
"loss": 0.0339,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.176246986265767e-05,
|
|
"loss": 0.0323,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.0407200341108617e-05,
|
|
"loss": 0.0332,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.9035038609219306e-05,
|
|
"loss": 0.0325,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.7650271329797427e-05,
|
|
"loss": 0.0284,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 2.6257224545652688e-05,
|
|
"loss": 0.0304,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 2.4860250164945876e-05,
|
|
"loss": 0.0287,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 2.346371236573409e-05,
|
|
"loss": 0.0303,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 2.2071973962184384e-05,
|
|
"loss": 0.0297,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 2.0689382775048418e-05,
|
|
"loss": 0.0291,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 1.9320258048976702e-05,
|
|
"loss": 0.0272,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_loss": 0.029604924842715263,
|
|
"eval_runtime": 8.5231,
|
|
"eval_samples_per_second": 435.525,
|
|
"eval_steps_per_second": 13.61,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 3.02,
|
|
"learning_rate": 1.796887695910535e-05,
|
|
"loss": 0.0269,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 3.1,
|
|
"learning_rate": 1.6639461249068726e-05,
|
|
"loss": 0.0299,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 3.19,
|
|
"learning_rate": 1.5336164042181494e-05,
|
|
"loss": 0.0277,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 3.28,
|
|
"learning_rate": 1.4063056866991826e-05,
|
|
"loss": 0.0262,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 3.36,
|
|
"learning_rate": 1.2824116937738579e-05,
|
|
"loss": 0.0272,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 3.45,
|
|
"learning_rate": 1.1623214729448317e-05,
|
|
"loss": 0.0267,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 3.53,
|
|
"learning_rate": 1.0464101886487958e-05,
|
|
"loss": 0.0287,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 3.62,
|
|
"learning_rate": 9.35039950234696e-06,
|
|
"loss": 0.0259,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 3.71,
|
|
"learning_rate": 8.285586807263254e-06,
|
|
"loss": 0.0284,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 3.79,
|
|
"learning_rate": 7.272990299033045e-06,
|
|
"loss": 0.0268,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 3.88,
|
|
"learning_rate": 6.4089163408243555e-06,
|
|
"loss": 0.0258,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 3.97,
|
|
"learning_rate": 5.504102886405624e-06,
|
|
"loss": 0.0265,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_loss": 0.02763618901371956,
|
|
"eval_runtime": 8.9353,
|
|
"eval_samples_per_second": 415.429,
|
|
"eval_steps_per_second": 12.982,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 4.05,
|
|
"learning_rate": 4.660195034524128e-06,
|
|
"loss": 0.0262,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 4.14,
|
|
"learning_rate": 3.879829171487476e-06,
|
|
"loss": 0.0258,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 4.22,
|
|
"learning_rate": 3.1654431770613837e-06,
|
|
"loss": 0.0275,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 4.31,
|
|
"learning_rate": 2.519268808480779e-06,
|
|
"loss": 0.0266,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 4.4,
|
|
"learning_rate": 1.997705748247067e-06,
|
|
"loss": 0.0254,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 4.48,
|
|
"learning_rate": 1.486513668803946e-06,
|
|
"loss": 0.0282,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 4.57,
|
|
"learning_rate": 1.0487782271553504e-06,
|
|
"loss": 0.0244,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 4.66,
|
|
"learning_rate": 6.858669182500971e-07,
|
|
"loss": 0.0247,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 4.74,
|
|
"learning_rate": 3.98913484834551e-07,
|
|
"loss": 0.0277,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 4.83,
|
|
"learning_rate": 1.8881437561586722e-07,
|
|
"loss": 0.0267,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 4.91,
|
|
"learning_rate": 5.622594473692067e-08,
|
|
"loss": 0.024,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"learning_rate": 1.5624013118137326e-09,
|
|
"loss": 0.0258,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_loss": 0.027217445895075798,
|
|
"eval_runtime": 8.5071,
|
|
"eval_samples_per_second": 436.342,
|
|
"eval_steps_per_second": 13.636,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"step": 580,
|
|
"total_flos": 5.106947651262218e+17,
|
|
"train_loss": 0.112489017330367,
|
|
"train_runtime": 655.275,
|
|
"train_samples_per_second": 113.288,
|
|
"train_steps_per_second": 0.885
|
|
}
|
|
],
|
|
"max_steps": 580,
|
|
"num_train_epochs": 5,
|
|
"total_flos": 5.106947651262218e+17,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|