finetuned_models/llama2_wenan_qlora_50e/trainer_state.json

1146 lines
25 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.48453608247423,
"global_step": 1200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.41,
"learning_rate": 1.388888888888889e-05,
"loss": 2.3154,
"step": 10
},
{
"epoch": 0.82,
"learning_rate": 2.777777777777778e-05,
"loss": 2.2935,
"step": 20
},
{
"epoch": 0.99,
"eval_loss": 2.2255096435546875,
"eval_runtime": 3.3098,
"eval_samples_per_second": 232.639,
"eval_steps_per_second": 7.553,
"step": 24
},
{
"epoch": 1.24,
"learning_rate": 4.166666666666667e-05,
"loss": 2.2031,
"step": 30
},
{
"epoch": 1.65,
"learning_rate": 4.999854313415309e-05,
"loss": 2.1121,
"step": 40
},
{
"epoch": 1.98,
"eval_loss": 2.0086605548858643,
"eval_runtime": 3.3338,
"eval_samples_per_second": 230.965,
"eval_steps_per_second": 7.499,
"step": 48
},
{
"epoch": 2.06,
"learning_rate": 4.9982155343321184e-05,
"loss": 2.0381,
"step": 50
},
{
"epoch": 2.47,
"learning_rate": 4.9947570655942796e-05,
"loss": 1.9515,
"step": 60
},
{
"epoch": 2.89,
"learning_rate": 4.989481426335828e-05,
"loss": 1.8964,
"step": 70
},
{
"epoch": 2.97,
"eval_loss": 1.8894507884979248,
"eval_runtime": 3.3135,
"eval_samples_per_second": 232.382,
"eval_steps_per_second": 7.545,
"step": 72
},
{
"epoch": 3.3,
"learning_rate": 4.982392459310141e-05,
"loss": 1.8723,
"step": 80
},
{
"epoch": 3.71,
"learning_rate": 4.9734953280908904e-05,
"loss": 1.8184,
"step": 90
},
{
"epoch": 4.0,
"eval_loss": 1.807281732559204,
"eval_runtime": 3.3174,
"eval_samples_per_second": 232.107,
"eval_steps_per_second": 7.536,
"step": 97
},
{
"epoch": 4.12,
"learning_rate": 4.9627965133109165e-05,
"loss": 1.8065,
"step": 100
},
{
"epoch": 4.54,
"learning_rate": 4.950303807941764e-05,
"loss": 1.7479,
"step": 110
},
{
"epoch": 4.95,
"learning_rate": 4.936026311617316e-05,
"loss": 1.7511,
"step": 120
},
{
"epoch": 4.99,
"eval_loss": 1.7449322938919067,
"eval_runtime": 3.3418,
"eval_samples_per_second": 230.412,
"eval_steps_per_second": 7.481,
"step": 121
},
{
"epoch": 5.36,
"learning_rate": 4.919974424005652e-05,
"loss": 1.7217,
"step": 130
},
{
"epoch": 5.77,
"learning_rate": 4.902159837233985e-05,
"loss": 1.7203,
"step": 140
},
{
"epoch": 5.98,
"eval_loss": 1.7159528732299805,
"eval_runtime": 3.3301,
"eval_samples_per_second": 231.221,
"eval_steps_per_second": 7.507,
"step": 145
},
{
"epoch": 6.19,
"learning_rate": 4.882595527372152e-05,
"loss": 1.7072,
"step": 150
},
{
"epoch": 6.6,
"learning_rate": 4.8612957449809135e-05,
"loss": 1.6995,
"step": 160
},
{
"epoch": 6.97,
"eval_loss": 1.6927465200424194,
"eval_runtime": 3.3065,
"eval_samples_per_second": 232.872,
"eval_steps_per_second": 7.561,
"step": 169
},
{
"epoch": 7.01,
"learning_rate": 4.838276004731892e-05,
"loss": 1.684,
"step": 170
},
{
"epoch": 7.42,
"learning_rate": 4.813553074106761e-05,
"loss": 1.671,
"step": 180
},
{
"epoch": 7.84,
"learning_rate": 4.787144961183874e-05,
"loss": 1.6634,
"step": 190
},
{
"epoch": 8.0,
"eval_loss": 1.6678295135498047,
"eval_runtime": 3.3139,
"eval_samples_per_second": 232.354,
"eval_steps_per_second": 7.544,
"step": 194
},
{
"epoch": 8.25,
"learning_rate": 4.759070901521263e-05,
"loss": 1.6528,
"step": 200
},
{
"epoch": 8.66,
"learning_rate": 4.7293513441455364e-05,
"loss": 1.6334,
"step": 210
},
{
"epoch": 8.99,
"eval_loss": 1.6490652561187744,
"eval_runtime": 3.3203,
"eval_samples_per_second": 231.91,
"eval_steps_per_second": 7.53,
"step": 218
},
{
"epoch": 9.07,
"learning_rate": 4.698007936656891e-05,
"loss": 1.636,
"step": 220
},
{
"epoch": 9.48,
"learning_rate": 4.665063509461097e-05,
"loss": 1.6254,
"step": 230
},
{
"epoch": 9.9,
"learning_rate": 4.630542059139924e-05,
"loss": 1.6115,
"step": 240
},
{
"epoch": 9.98,
"eval_loss": 1.6315476894378662,
"eval_runtime": 3.326,
"eval_samples_per_second": 231.509,
"eval_steps_per_second": 7.517,
"step": 242
},
{
"epoch": 10.31,
"learning_rate": 4.59446873097213e-05,
"loss": 1.5838,
"step": 250
},
{
"epoch": 10.72,
"learning_rate": 4.556869800617754e-05,
"loss": 1.6077,
"step": 260
},
{
"epoch": 10.97,
"eval_loss": 1.6146764755249023,
"eval_runtime": 3.3121,
"eval_samples_per_second": 232.48,
"eval_steps_per_second": 7.548,
"step": 266
},
{
"epoch": 11.13,
"learning_rate": 4.517772654979023e-05,
"loss": 1.6012,
"step": 270
},
{
"epoch": 11.55,
"learning_rate": 4.4772057722518643e-05,
"loss": 1.5915,
"step": 280
},
{
"epoch": 11.96,
"learning_rate": 4.435198701182492e-05,
"loss": 1.5884,
"step": 290
},
{
"epoch": 12.0,
"eval_loss": 1.5998302698135376,
"eval_runtime": 3.3303,
"eval_samples_per_second": 231.208,
"eval_steps_per_second": 7.507,
"step": 291
},
{
"epoch": 12.37,
"learning_rate": 4.391782039544238e-05,
"loss": 1.5503,
"step": 300
},
{
"epoch": 12.78,
"learning_rate": 4.346987411850253e-05,
"loss": 1.5488,
"step": 310
},
{
"epoch": 12.99,
"eval_loss": 1.582898736000061,
"eval_runtime": 3.3255,
"eval_samples_per_second": 231.544,
"eval_steps_per_second": 7.518,
"step": 315
},
{
"epoch": 13.2,
"learning_rate": 4.30084744631835e-05,
"loss": 1.5662,
"step": 320
},
{
"epoch": 13.61,
"learning_rate": 4.253395751104748e-05,
"loss": 1.5438,
"step": 330
},
{
"epoch": 13.98,
"eval_loss": 1.5704370737075806,
"eval_runtime": 3.3233,
"eval_samples_per_second": 231.694,
"eval_steps_per_second": 7.523,
"step": 339
},
{
"epoch": 14.02,
"learning_rate": 4.20466688982403e-05,
"loss": 1.5412,
"step": 340
},
{
"epoch": 14.43,
"learning_rate": 4.154696356373154e-05,
"loss": 1.535,
"step": 350
},
{
"epoch": 14.85,
"learning_rate": 4.10352054907785e-05,
"loss": 1.5429,
"step": 360
},
{
"epoch": 14.97,
"eval_loss": 1.5577681064605713,
"eval_runtime": 3.3105,
"eval_samples_per_second": 232.594,
"eval_steps_per_second": 7.552,
"step": 363
},
{
"epoch": 15.26,
"learning_rate": 4.051176744180227e-05,
"loss": 1.515,
"step": 370
},
{
"epoch": 15.67,
"learning_rate": 3.997703068686923e-05,
"loss": 1.5198,
"step": 380
},
{
"epoch": 16.0,
"eval_loss": 1.54729163646698,
"eval_runtime": 3.3137,
"eval_samples_per_second": 232.37,
"eval_steps_per_second": 7.544,
"step": 388
},
{
"epoch": 16.08,
"learning_rate": 3.943138472597549e-05,
"loss": 1.5096,
"step": 390
},
{
"epoch": 16.49,
"learning_rate": 3.887522700533675e-05,
"loss": 1.5052,
"step": 400
},
{
"epoch": 16.91,
"learning_rate": 3.8308962627890174e-05,
"loss": 1.4901,
"step": 410
},
{
"epoch": 16.99,
"eval_loss": 1.5360088348388672,
"eval_runtime": 3.3236,
"eval_samples_per_second": 231.68,
"eval_steps_per_second": 7.522,
"step": 412
},
{
"epoch": 17.32,
"learning_rate": 3.773300405821908e-05,
"loss": 1.5038,
"step": 420
},
{
"epoch": 17.73,
"learning_rate": 3.714777082211551e-05,
"loss": 1.5092,
"step": 430
},
{
"epoch": 17.98,
"eval_loss": 1.5263161659240723,
"eval_runtime": 3.332,
"eval_samples_per_second": 231.089,
"eval_steps_per_second": 7.503,
"step": 436
},
{
"epoch": 18.14,
"learning_rate": 3.6553689200999426e-05,
"loss": 1.481,
"step": 440
},
{
"epoch": 18.56,
"learning_rate": 3.595119192141706e-05,
"loss": 1.483,
"step": 450
},
{
"epoch": 18.97,
"learning_rate": 3.534071783984479e-05,
"loss": 1.4879,
"step": 460
},
{
"epoch": 18.97,
"eval_loss": 1.518540382385254,
"eval_runtime": 3.3283,
"eval_samples_per_second": 231.351,
"eval_steps_per_second": 7.511,
"step": 460
},
{
"epoch": 19.38,
"learning_rate": 3.472271162302789e-05,
"loss": 1.4647,
"step": 470
},
{
"epoch": 19.79,
"learning_rate": 3.409762342408719e-05,
"loss": 1.4799,
"step": 480
},
{
"epoch": 20.0,
"eval_loss": 1.5089068412780762,
"eval_runtime": 3.6806,
"eval_samples_per_second": 209.206,
"eval_steps_per_second": 6.792,
"step": 485
},
{
"epoch": 20.21,
"learning_rate": 3.346590855462939e-05,
"loss": 1.4678,
"step": 490
},
{
"epoch": 20.62,
"learning_rate": 3.2828027153100065e-05,
"loss": 1.4658,
"step": 500
},
{
"epoch": 20.99,
"eval_loss": 1.498407006263733,
"eval_runtime": 3.3262,
"eval_samples_per_second": 231.494,
"eval_steps_per_second": 7.516,
"step": 509
},
{
"epoch": 21.03,
"learning_rate": 3.218444384962071e-05,
"loss": 1.4547,
"step": 510
},
{
"epoch": 21.44,
"learning_rate": 3.153562742755414e-05,
"loss": 1.4633,
"step": 520
},
{
"epoch": 21.86,
"learning_rate": 3.088205048204469e-05,
"loss": 1.4442,
"step": 530
},
{
"epoch": 21.98,
"eval_loss": 1.489104986190796,
"eval_runtime": 3.323,
"eval_samples_per_second": 231.719,
"eval_steps_per_second": 7.523,
"step": 533
},
{
"epoch": 22.27,
"learning_rate": 3.0224189075781884e-05,
"loss": 1.4345,
"step": 540
},
{
"epoch": 22.68,
"learning_rate": 2.9562522392238346e-05,
"loss": 1.4434,
"step": 550
},
{
"epoch": 22.97,
"eval_loss": 1.482366681098938,
"eval_runtime": 3.3203,
"eval_samples_per_second": 231.906,
"eval_steps_per_second": 7.529,
"step": 557
},
{
"epoch": 23.09,
"learning_rate": 2.8897532386634663e-05,
"loss": 1.4529,
"step": 560
},
{
"epoch": 23.51,
"learning_rate": 2.8229703434885163e-05,
"loss": 1.4334,
"step": 570
},
{
"epoch": 23.92,
"learning_rate": 2.7559521980780568e-05,
"loss": 1.4423,
"step": 580
},
{
"epoch": 24.0,
"eval_loss": 1.474360466003418,
"eval_runtime": 3.3291,
"eval_samples_per_second": 231.294,
"eval_steps_per_second": 7.51,
"step": 582
},
{
"epoch": 24.33,
"learning_rate": 2.68874761816644e-05,
"loss": 1.4187,
"step": 590
},
{
"epoch": 24.74,
"learning_rate": 2.621405555286121e-05,
"loss": 1.4299,
"step": 600
},
{
"epoch": 24.99,
"eval_loss": 1.4700332880020142,
"eval_runtime": 3.3237,
"eval_samples_per_second": 231.671,
"eval_steps_per_second": 7.522,
"step": 606
},
{
"epoch": 25.15,
"learning_rate": 2.5539750611115697e-05,
"loss": 1.4282,
"step": 610
},
{
"epoch": 25.57,
"learning_rate": 2.4865052517302396e-05,
"loss": 1.4229,
"step": 620
},
{
"epoch": 25.98,
"learning_rate": 2.419045271866611e-05,
"loss": 1.4034,
"step": 630
},
{
"epoch": 25.98,
"eval_loss": 1.4617327451705933,
"eval_runtime": 3.3167,
"eval_samples_per_second": 232.159,
"eval_steps_per_second": 7.538,
"step": 630
},
{
"epoch": 26.39,
"learning_rate": 2.351644259085387e-05,
"loss": 1.4082,
"step": 640
},
{
"epoch": 26.8,
"learning_rate": 2.2843513079998983e-05,
"loss": 1.4161,
"step": 650
},
{
"epoch": 26.97,
"eval_loss": 1.4545296430587769,
"eval_runtime": 3.3224,
"eval_samples_per_second": 231.76,
"eval_steps_per_second": 7.525,
"step": 654
},
{
"epoch": 27.22,
"learning_rate": 2.2172154345117894e-05,
"loss": 1.4092,
"step": 660
},
{
"epoch": 27.63,
"learning_rate": 2.1502855401080482e-05,
"loss": 1.3839,
"step": 670
},
{
"epoch": 28.0,
"eval_loss": 1.4493850469589233,
"eval_runtime": 3.6643,
"eval_samples_per_second": 210.135,
"eval_steps_per_second": 6.823,
"step": 679
},
{
"epoch": 28.04,
"learning_rate": 2.0836103762413638e-05,
"loss": 1.3922,
"step": 680
},
{
"epoch": 28.45,
"learning_rate": 2.0172385088197803e-05,
"loss": 1.3896,
"step": 690
},
{
"epoch": 28.87,
"learning_rate": 1.9512182828314885e-05,
"loss": 1.4081,
"step": 700
},
{
"epoch": 28.99,
"eval_loss": 1.4452208280563354,
"eval_runtime": 3.3261,
"eval_samples_per_second": 231.505,
"eval_steps_per_second": 7.516,
"step": 703
},
{
"epoch": 29.28,
"learning_rate": 1.885597787130542e-05,
"loss": 1.3947,
"step": 710
},
{
"epoch": 29.69,
"learning_rate": 1.820424819409143e-05,
"loss": 1.3918,
"step": 720
},
{
"epoch": 29.98,
"eval_loss": 1.439475655555725,
"eval_runtime": 3.3169,
"eval_samples_per_second": 232.145,
"eval_steps_per_second": 7.537,
"step": 727
},
{
"epoch": 30.1,
"learning_rate": 1.7557468513819993e-05,
"loss": 1.3937,
"step": 730
},
{
"epoch": 30.52,
"learning_rate": 1.6916109942081293e-05,
"loss": 1.3749,
"step": 740
},
{
"epoch": 30.93,
"learning_rate": 1.6280639641752942e-05,
"loss": 1.3788,
"step": 750
},
{
"epoch": 30.97,
"eval_loss": 1.4356813430786133,
"eval_runtime": 3.3451,
"eval_samples_per_second": 230.187,
"eval_steps_per_second": 7.474,
"step": 751
},
{
"epoch": 31.34,
"learning_rate": 1.5651520486720516e-05,
"loss": 1.3907,
"step": 760
},
{
"epoch": 31.75,
"learning_rate": 1.5029210724722126e-05,
"loss": 1.3819,
"step": 770
},
{
"epoch": 32.0,
"eval_loss": 1.4313223361968994,
"eval_runtime": 3.3114,
"eval_samples_per_second": 232.53,
"eval_steps_per_second": 7.55,
"step": 776
},
{
"epoch": 32.16,
"learning_rate": 1.4414163643562755e-05,
"loss": 1.3903,
"step": 780
},
{
"epoch": 32.58,
"learning_rate": 1.3806827240941265e-05,
"loss": 1.3868,
"step": 790
},
{
"epoch": 32.99,
"learning_rate": 1.3207643898130853e-05,
"loss": 1.3813,
"step": 800
},
{
"epoch": 32.99,
"eval_loss": 1.428261160850525,
"eval_runtime": 3.3143,
"eval_samples_per_second": 232.327,
"eval_steps_per_second": 7.543,
"step": 800
},
{
"epoch": 33.4,
"learning_rate": 1.2617050057750322e-05,
"loss": 1.3607,
"step": 810
},
{
"epoch": 33.81,
"learning_rate": 1.2035475905861136e-05,
"loss": 1.3785,
"step": 820
},
{
"epoch": 33.98,
"eval_loss": 1.4251344203948975,
"eval_runtime": 3.3123,
"eval_samples_per_second": 232.467,
"eval_steps_per_second": 7.548,
"step": 824
},
{
"epoch": 34.23,
"learning_rate": 1.1463345058621755e-05,
"loss": 1.3555,
"step": 830
},
{
"epoch": 34.64,
"learning_rate": 1.0901074253727336e-05,
"loss": 1.3825,
"step": 840
},
{
"epoch": 34.97,
"eval_loss": 1.421394944190979,
"eval_runtime": 3.3263,
"eval_samples_per_second": 231.487,
"eval_steps_per_second": 7.516,
"step": 848
},
{
"epoch": 35.05,
"learning_rate": 1.0349073046859828e-05,
"loss": 1.3658,
"step": 850
},
{
"epoch": 35.46,
"learning_rate": 9.807743513369272e-06,
"loss": 1.3579,
"step": 860
},
{
"epoch": 35.88,
"learning_rate": 9.277479955403887e-06,
"loss": 1.3898,
"step": 870
},
{
"epoch": 36.0,
"eval_loss": 1.41862952709198,
"eval_runtime": 3.3199,
"eval_samples_per_second": 231.934,
"eval_steps_per_second": 7.53,
"step": 873
},
{
"epoch": 36.29,
"learning_rate": 8.758668614701973e-06,
"loss": 1.3586,
"step": 880
},
{
"epoch": 36.7,
"learning_rate": 8.251687391255117e-06,
"loss": 1.3616,
"step": 890
},
{
"epoch": 36.99,
"eval_loss": 1.4160585403442383,
"eval_runtime": 3.3202,
"eval_samples_per_second": 231.91,
"eval_steps_per_second": 7.53,
"step": 897
},
{
"epoch": 37.11,
"learning_rate": 7.756905568047393e-06,
"loss": 1.3792,
"step": 900
},
{
"epoch": 37.53,
"learning_rate": 7.274683542071242e-06,
"loss": 1.3662,
"step": 910
},
{
"epoch": 37.94,
"learning_rate": 6.805372561815767e-06,
"loss": 1.3564,
"step": 920
},
{
"epoch": 37.98,
"eval_loss": 1.4149819612503052,
"eval_runtime": 3.3197,
"eval_samples_per_second": 231.949,
"eval_steps_per_second": 7.531,
"step": 921
},
{
"epoch": 38.35,
"learning_rate": 6.349314471418849e-06,
"loss": 1.3607,
"step": 930
},
{
"epoch": 38.76,
"learning_rate": 5.906841461669327e-06,
"loss": 1.3575,
"step": 940
},
{
"epoch": 38.97,
"eval_loss": 1.4127955436706543,
"eval_runtime": 3.33,
"eval_samples_per_second": 231.231,
"eval_steps_per_second": 7.507,
"step": 945
},
{
"epoch": 39.18,
"learning_rate": 5.4782758280406e-06,
"loss": 1.3486,
"step": 950
},
{
"epoch": 39.59,
"learning_rate": 5.063929735931985e-06,
"loss": 1.366,
"step": 960
},
{
"epoch": 40.0,
"learning_rate": 4.66410499328874e-06,
"loss": 1.3461,
"step": 970
},
{
"epoch": 40.0,
"eval_loss": 1.411194920539856,
"eval_runtime": 3.3078,
"eval_samples_per_second": 232.784,
"eval_steps_per_second": 7.558,
"step": 970
},
{
"epoch": 40.41,
"learning_rate": 4.279092830766471e-06,
"loss": 1.3545,
"step": 980
},
{
"epoch": 40.82,
"learning_rate": 3.90917368959989e-06,
"loss": 1.3419,
"step": 990
},
{
"epoch": 40.99,
"eval_loss": 1.4097695350646973,
"eval_runtime": 3.3186,
"eval_samples_per_second": 232.025,
"eval_steps_per_second": 7.533,
"step": 994
},
{
"epoch": 41.24,
"learning_rate": 3.5546170173306444e-06,
"loss": 1.3435,
"step": 1000
},
{
"epoch": 41.65,
"learning_rate": 3.215681071542867e-06,
"loss": 1.3453,
"step": 1010
},
{
"epoch": 41.98,
"eval_loss": 1.4083501100540161,
"eval_runtime": 3.3153,
"eval_samples_per_second": 232.254,
"eval_steps_per_second": 7.541,
"step": 1018
},
{
"epoch": 42.06,
"learning_rate": 2.892612731749414e-06,
"loss": 1.3573,
"step": 1020
},
{
"epoch": 42.47,
"learning_rate": 2.5856473195658897e-06,
"loss": 1.3615,
"step": 1030
},
{
"epoch": 42.89,
"learning_rate": 2.2950084273033634e-06,
"loss": 1.344,
"step": 1040
},
{
"epoch": 42.97,
"eval_loss": 1.4079786539077759,
"eval_runtime": 3.3319,
"eval_samples_per_second": 231.096,
"eval_steps_per_second": 7.503,
"step": 1042
},
{
"epoch": 43.3,
"learning_rate": 2.020907755104698e-06,
"loss": 1.3352,
"step": 1050
},
{
"epoch": 43.71,
"learning_rate": 1.7635449567430185e-06,
"loss": 1.3595,
"step": 1060
},
{
"epoch": 44.0,
"eval_loss": 1.4071862697601318,
"eval_runtime": 3.3215,
"eval_samples_per_second": 231.821,
"eval_steps_per_second": 7.527,
"step": 1067
},
{
"epoch": 44.12,
"learning_rate": 1.5231074941947781e-06,
"loss": 1.3352,
"step": 1070
},
{
"epoch": 44.54,
"learning_rate": 1.2997705010932393e-06,
"loss": 1.3463,
"step": 1080
},
{
"epoch": 44.95,
"learning_rate": 1.0936966551618604e-06,
"loss": 1.3566,
"step": 1090
},
{
"epoch": 44.99,
"eval_loss": 1.4066252708435059,
"eval_runtime": 3.3208,
"eval_samples_per_second": 231.869,
"eval_steps_per_second": 7.528,
"step": 1091
},
{
"epoch": 45.36,
"learning_rate": 9.050360597205515e-07,
"loss": 1.3309,
"step": 1100
},
{
"epoch": 45.77,
"learning_rate": 7.339261343510206e-07,
"loss": 1.3394,
"step": 1110
},
{
"epoch": 45.98,
"eval_loss": 1.406414270401001,
"eval_runtime": 3.3119,
"eval_samples_per_second": 232.494,
"eval_steps_per_second": 7.549,
"step": 1115
},
{
"epoch": 46.19,
"learning_rate": 5.804915148009571e-07,
"loss": 1.3552,
"step": 1120
},
{
"epoch": 46.6,
"learning_rate": 4.4484396219986735e-07,
"loss": 1.3409,
"step": 1130
},
{
"epoch": 46.97,
"eval_loss": 1.4059563875198364,
"eval_runtime": 3.3174,
"eval_samples_per_second": 232.112,
"eval_steps_per_second": 7.536,
"step": 1139
},
{
"epoch": 47.01,
"learning_rate": 3.270822816527325e-07,
"loss": 1.3432,
"step": 1140
},
{
"epoch": 47.42,
"learning_rate": 2.272922502707997e-07,
"loss": 1.3395,
"step": 1150
},
{
"epoch": 47.84,
"learning_rate": 1.4554655469189439e-07,
"loss": 1.3263,
"step": 1160
},
{
"epoch": 48.0,
"eval_loss": 1.4058961868286133,
"eval_runtime": 3.3133,
"eval_samples_per_second": 232.395,
"eval_steps_per_second": 7.545,
"step": 1164
},
{
"epoch": 48.25,
"learning_rate": 8.190473813576572e-08,
"loss": 1.3607,
"step": 1170
},
{
"epoch": 48.66,
"learning_rate": 3.6413157033077236e-08,
"loss": 1.3466,
"step": 1180
},
{
"epoch": 48.99,
"eval_loss": 1.4058654308319092,
"eval_runtime": 3.3208,
"eval_samples_per_second": 231.87,
"eval_steps_per_second": 7.528,
"step": 1188
},
{
"epoch": 49.07,
"learning_rate": 9.104947259561126e-09,
"loss": 1.3313,
"step": 1190
},
{
"epoch": 49.48,
"learning_rate": 0.0,
"loss": 1.3469,
"step": 1200
},
{
"epoch": 49.48,
"eval_loss": 1.4058780670166016,
"eval_runtime": 3.3096,
"eval_samples_per_second": 232.657,
"eval_steps_per_second": 7.554,
"step": 1200
},
{
"epoch": 49.48,
"step": 1200,
"total_flos": 7.868691621724815e+17,
"train_loss": 1.4999438818295796,
"train_runtime": 2257.7963,
"train_samples_per_second": 68.164,
"train_steps_per_second": 0.531
}
],
"max_steps": 1200,
"num_train_epochs": 50,
"total_flos": 7.868691621724815e+17,
"trial_name": null,
"trial_params": null
}