finetuned_models/my_atom_7b_model/trainer_state.json

414 lines
8.4 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 580,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09,
"learning_rate": 2.777777777777778e-05,
"loss": 1.7619,
"step": 10
},
{
"epoch": 0.17,
"learning_rate": 4.999843759868819e-05,
"loss": 1.4356,
"step": 20
},
{
"epoch": 0.26,
"learning_rate": 4.994377405526308e-05,
"loss": 0.8933,
"step": 30
},
{
"epoch": 0.34,
"learning_rate": 4.981118562438414e-05,
"loss": 0.3785,
"step": 40
},
{
"epoch": 0.43,
"learning_rate": 4.960108651516545e-05,
"loss": 0.1976,
"step": 50
},
{
"epoch": 0.52,
"learning_rate": 4.93141330817499e-05,
"loss": 0.119,
"step": 60
},
{
"epoch": 0.6,
"learning_rate": 4.895122177284465e-05,
"loss": 0.0819,
"step": 70
},
{
"epoch": 0.69,
"learning_rate": 4.851348633119606e-05,
"loss": 0.0655,
"step": 80
},
{
"epoch": 0.78,
"learning_rate": 4.800229425175294e-05,
"loss": 0.0579,
"step": 90
},
{
"epoch": 0.86,
"learning_rate": 4.741924250958289e-05,
"loss": 0.0531,
"step": 100
},
{
"epoch": 0.95,
"learning_rate": 4.676615257088776e-05,
"loss": 0.0491,
"step": 110
},
{
"epoch": 1.0,
"eval_loss": 0.04749821871519089,
"eval_runtime": 8.519,
"eval_samples_per_second": 435.733,
"eval_steps_per_second": 13.617,
"step": 116
},
{
"epoch": 1.03,
"learning_rate": 4.604506470270403e-05,
"loss": 0.0466,
"step": 120
},
{
"epoch": 1.12,
"learning_rate": 4.525823159906459e-05,
"loss": 0.0418,
"step": 130
},
{
"epoch": 1.21,
"learning_rate": 4.440811134353412e-05,
"loss": 0.0407,
"step": 140
},
{
"epoch": 1.29,
"learning_rate": 4.349735973010305e-05,
"loss": 0.0404,
"step": 150
},
{
"epoch": 1.38,
"learning_rate": 4.252882196642992e-05,
"loss": 0.0402,
"step": 160
},
{
"epoch": 1.47,
"learning_rate": 4.150552378535137e-05,
"loss": 0.0381,
"step": 170
},
{
"epoch": 1.55,
"learning_rate": 4.043066199242762e-05,
"loss": 0.038,
"step": 180
},
{
"epoch": 1.64,
"learning_rate": 3.930759447905298e-05,
"loss": 0.0357,
"step": 190
},
{
"epoch": 1.72,
"learning_rate": 3.813982973233083e-05,
"loss": 0.0345,
"step": 200
},
{
"epoch": 1.81,
"learning_rate": 3.693101587448436e-05,
"loss": 0.0329,
"step": 210
},
{
"epoch": 1.9,
"learning_rate": 3.568492926604412e-05,
"loss": 0.0323,
"step": 220
},
{
"epoch": 1.98,
"learning_rate": 3.440546270841639e-05,
"loss": 0.0344,
"step": 230
},
{
"epoch": 2.0,
"eval_loss": 0.03341960161924362,
"eval_runtime": 8.519,
"eval_samples_per_second": 435.734,
"eval_steps_per_second": 13.617,
"step": 232
},
{
"epoch": 2.07,
"learning_rate": 3.309661328268776e-05,
"loss": 0.0339,
"step": 240
},
{
"epoch": 2.16,
"learning_rate": 3.176246986265767e-05,
"loss": 0.0323,
"step": 250
},
{
"epoch": 2.24,
"learning_rate": 3.0407200341108617e-05,
"loss": 0.0332,
"step": 260
},
{
"epoch": 2.33,
"learning_rate": 2.9035038609219306e-05,
"loss": 0.0325,
"step": 270
},
{
"epoch": 2.41,
"learning_rate": 2.7650271329797427e-05,
"loss": 0.0284,
"step": 280
},
{
"epoch": 2.5,
"learning_rate": 2.6257224545652688e-05,
"loss": 0.0304,
"step": 290
},
{
"epoch": 2.59,
"learning_rate": 2.4860250164945876e-05,
"loss": 0.0287,
"step": 300
},
{
"epoch": 2.67,
"learning_rate": 2.346371236573409e-05,
"loss": 0.0303,
"step": 310
},
{
"epoch": 2.76,
"learning_rate": 2.2071973962184384e-05,
"loss": 0.0297,
"step": 320
},
{
"epoch": 2.84,
"learning_rate": 2.0689382775048418e-05,
"loss": 0.0291,
"step": 330
},
{
"epoch": 2.93,
"learning_rate": 1.9320258048976702e-05,
"loss": 0.0272,
"step": 340
},
{
"epoch": 3.0,
"eval_loss": 0.029604924842715263,
"eval_runtime": 8.5231,
"eval_samples_per_second": 435.525,
"eval_steps_per_second": 13.61,
"step": 348
},
{
"epoch": 3.02,
"learning_rate": 1.796887695910535e-05,
"loss": 0.0269,
"step": 350
},
{
"epoch": 3.1,
"learning_rate": 1.6639461249068726e-05,
"loss": 0.0299,
"step": 360
},
{
"epoch": 3.19,
"learning_rate": 1.5336164042181494e-05,
"loss": 0.0277,
"step": 370
},
{
"epoch": 3.28,
"learning_rate": 1.4063056866991826e-05,
"loss": 0.0262,
"step": 380
},
{
"epoch": 3.36,
"learning_rate": 1.2824116937738579e-05,
"loss": 0.0272,
"step": 390
},
{
"epoch": 3.45,
"learning_rate": 1.1623214729448317e-05,
"loss": 0.0267,
"step": 400
},
{
"epoch": 3.53,
"learning_rate": 1.0464101886487958e-05,
"loss": 0.0287,
"step": 410
},
{
"epoch": 3.62,
"learning_rate": 9.35039950234696e-06,
"loss": 0.0259,
"step": 420
},
{
"epoch": 3.71,
"learning_rate": 8.285586807263254e-06,
"loss": 0.0284,
"step": 430
},
{
"epoch": 3.79,
"learning_rate": 7.272990299033045e-06,
"loss": 0.0268,
"step": 440
},
{
"epoch": 3.88,
"learning_rate": 6.4089163408243555e-06,
"loss": 0.0258,
"step": 450
},
{
"epoch": 3.97,
"learning_rate": 5.504102886405624e-06,
"loss": 0.0265,
"step": 460
},
{
"epoch": 4.0,
"eval_loss": 0.02763618901371956,
"eval_runtime": 8.9353,
"eval_samples_per_second": 415.429,
"eval_steps_per_second": 12.982,
"step": 464
},
{
"epoch": 4.05,
"learning_rate": 4.660195034524128e-06,
"loss": 0.0262,
"step": 470
},
{
"epoch": 4.14,
"learning_rate": 3.879829171487476e-06,
"loss": 0.0258,
"step": 480
},
{
"epoch": 4.22,
"learning_rate": 3.1654431770613837e-06,
"loss": 0.0275,
"step": 490
},
{
"epoch": 4.31,
"learning_rate": 2.519268808480779e-06,
"loss": 0.0266,
"step": 500
},
{
"epoch": 4.4,
"learning_rate": 1.997705748247067e-06,
"loss": 0.0254,
"step": 510
},
{
"epoch": 4.48,
"learning_rate": 1.486513668803946e-06,
"loss": 0.0282,
"step": 520
},
{
"epoch": 4.57,
"learning_rate": 1.0487782271553504e-06,
"loss": 0.0244,
"step": 530
},
{
"epoch": 4.66,
"learning_rate": 6.858669182500971e-07,
"loss": 0.0247,
"step": 540
},
{
"epoch": 4.74,
"learning_rate": 3.98913484834551e-07,
"loss": 0.0277,
"step": 550
},
{
"epoch": 4.83,
"learning_rate": 1.8881437561586722e-07,
"loss": 0.0267,
"step": 560
},
{
"epoch": 4.91,
"learning_rate": 5.622594473692067e-08,
"loss": 0.024,
"step": 570
},
{
"epoch": 5.0,
"learning_rate": 1.5624013118137326e-09,
"loss": 0.0258,
"step": 580
},
{
"epoch": 5.0,
"eval_loss": 0.027217445895075798,
"eval_runtime": 8.5071,
"eval_samples_per_second": 436.342,
"eval_steps_per_second": 13.636,
"step": 580
},
{
"epoch": 5.0,
"step": 580,
"total_flos": 5.106947651262218e+17,
"train_loss": 0.112489017330367,
"train_runtime": 655.275,
"train_samples_per_second": 113.288,
"train_steps_per_second": 0.885
}
],
"max_steps": 580,
"num_train_epochs": 5,
"total_flos": 5.106947651262218e+17,
"trial_name": null,
"trial_params": null
}