generated from xuyuqing/ailab
956 lines
20 KiB
JSON
956 lines
20 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 14.87603305785124,
|
|
"global_step": 1350,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.2195121951219513e-05,
|
|
"loss": 3.2681,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 2.4390243902439026e-05,
|
|
"loss": 3.0116,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 3.6585365853658535e-05,
|
|
"loss": 2.438,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.878048780487805e-05,
|
|
"loss": 1.8845,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.9994168254017926e-05,
|
|
"loss": 1.6524,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 4.997401262510025e-05,
|
|
"loss": 1.5273,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 4.993947272264899e-05,
|
|
"loss": 1.4715,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 4.989056844059999e-05,
|
|
"loss": 1.3971,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 4.9827327946335875e-05,
|
|
"loss": 1.402,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"eval_loss": 1.363455057144165,
|
|
"eval_runtime": 21.7188,
|
|
"eval_samples_per_second": 133.709,
|
|
"eval_steps_per_second": 4.19,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 4.974978766446258e-05,
|
|
"loss": 1.3469,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 4.965799225582979e-05,
|
|
"loss": 1.3422,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 4.955199459180771e-05,
|
|
"loss": 1.3365,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 4.943185572383474e-05,
|
|
"loss": 1.3001,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 4.929764484825369e-05,
|
|
"loss": 1.3332,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 4.914943926645679e-05,
|
|
"loss": 1.3073,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 4.902085570615122e-05,
|
|
"loss": 1.3238,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 4.884768015060423e-05,
|
|
"loss": 1.315,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 4.866076905485482e-05,
|
|
"loss": 1.3201,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"eval_loss": 1.2681220769882202,
|
|
"eval_runtime": 21.6987,
|
|
"eval_samples_per_second": 133.833,
|
|
"eval_steps_per_second": 4.194,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 4.846023007402305e-05,
|
|
"loss": 1.2852,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 4.824617871247732e-05,
|
|
"loss": 1.2377,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 4.801873825730733e-05,
|
|
"loss": 1.2607,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 4.777803970731447e-05,
|
|
"loss": 1.2585,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 4.752422169756048e-05,
|
|
"loss": 1.2589,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 4.725743041951787e-05,
|
|
"loss": 1.2031,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 4.697781953686805e-05,
|
|
"loss": 1.1827,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 4.6685550096995744e-05,
|
|
"loss": 1.2372,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 4.6380790438230535e-05,
|
|
"loss": 1.2416,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_loss": 1.2141121625900269,
|
|
"eval_runtime": 21.7178,
|
|
"eval_samples_per_second": 133.715,
|
|
"eval_steps_per_second": 4.19,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 3.09,
|
|
"learning_rate": 4.6063716092889144e-05,
|
|
"loss": 1.272,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 3.2,
|
|
"learning_rate": 4.573450968617411e-05,
|
|
"loss": 1.2044,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 3.31,
|
|
"learning_rate": 4.539336083098721e-05,
|
|
"loss": 1.2124,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 3.42,
|
|
"learning_rate": 4.504046601871822e-05,
|
|
"loss": 1.1781,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 3.53,
|
|
"learning_rate": 4.46760285060718e-05,
|
|
"loss": 1.1853,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 3.64,
|
|
"learning_rate": 4.433833908687633e-05,
|
|
"loss": 1.1976,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"learning_rate": 4.395255412906024e-05,
|
|
"loss": 1.1981,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 3.86,
|
|
"learning_rate": 4.355585307510675e-05,
|
|
"loss": 1.1953,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 3.97,
|
|
"learning_rate": 4.314846441278658e-05,
|
|
"loss": 1.1495,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_loss": 1.173239827156067,
|
|
"eval_runtime": 21.7176,
|
|
"eval_samples_per_second": 133.716,
|
|
"eval_steps_per_second": 4.19,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 4.08,
|
|
"learning_rate": 4.273062278560863e-05,
|
|
"loss": 1.1963,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 4.19,
|
|
"learning_rate": 4.2302568857672375e-05,
|
|
"loss": 1.1725,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 4.3,
|
|
"learning_rate": 4.18645491750527e-05,
|
|
"loss": 1.1278,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 4.41,
|
|
"learning_rate": 4.1462019138588816e-05,
|
|
"loss": 1.1755,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 4.52,
|
|
"learning_rate": 4.1005764197174445e-05,
|
|
"loss": 1.1234,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 4.63,
|
|
"learning_rate": 4.054029042121465e-05,
|
|
"loss": 1.135,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 4.74,
|
|
"learning_rate": 4.0065865909481417e-05,
|
|
"loss": 1.1457,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 4.85,
|
|
"learning_rate": 3.9582763916098925e-05,
|
|
"loss": 1.1632,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 4.96,
|
|
"learning_rate": 3.909126269315754e-05,
|
|
"loss": 1.1386,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 4.99,
|
|
"eval_loss": 1.1348333358764648,
|
|
"eval_runtime": 21.6995,
|
|
"eval_samples_per_second": 133.828,
|
|
"eval_steps_per_second": 4.194,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 5.07,
|
|
"learning_rate": 3.859164533044901e-05,
|
|
"loss": 1.1216,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 5.18,
|
|
"learning_rate": 3.8084199592415305e-05,
|
|
"loss": 1.0945,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 5.29,
|
|
"learning_rate": 3.7569217752405095e-05,
|
|
"loss": 1.0991,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 5.4,
|
|
"learning_rate": 3.704699642433311e-05,
|
|
"loss": 1.0935,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 5.51,
|
|
"learning_rate": 3.6517836391839586e-05,
|
|
"loss": 1.113,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 5.62,
|
|
"learning_rate": 3.5982042435047936e-05,
|
|
"loss": 1.1117,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 5.73,
|
|
"learning_rate": 3.5494410851346965e-05,
|
|
"loss": 1.062,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 5.84,
|
|
"learning_rate": 3.494686564912302e-05,
|
|
"loss": 1.1157,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 5.95,
|
|
"learning_rate": 3.439359135408382e-05,
|
|
"loss": 1.0741,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 5.99,
|
|
"eval_loss": 1.1024105548858643,
|
|
"eval_runtime": 21.7178,
|
|
"eval_samples_per_second": 133.715,
|
|
"eval_steps_per_second": 4.19,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 6.06,
|
|
"learning_rate": 3.383490663543635e-05,
|
|
"loss": 1.0864,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 6.17,
|
|
"learning_rate": 3.327113327862747e-05,
|
|
"loss": 1.1023,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 6.28,
|
|
"learning_rate": 3.2702596000005194e-05,
|
|
"loss": 1.0629,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 6.39,
|
|
"learning_rate": 3.2187109888308544e-05,
|
|
"loss": 1.0803,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 6.5,
|
|
"learning_rate": 3.161042542700322e-05,
|
|
"loss": 1.0683,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 6.61,
|
|
"learning_rate": 3.1029933561219375e-05,
|
|
"loss": 1.0775,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 6.72,
|
|
"learning_rate": 3.0445968636658968e-05,
|
|
"loss": 1.053,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 6.83,
|
|
"learning_rate": 2.9858866999400402e-05,
|
|
"loss": 1.0666,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 6.94,
|
|
"learning_rate": 2.9268966802173436e-05,
|
|
"loss": 1.0307,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"eval_loss": 1.06972336769104,
|
|
"eval_runtime": 21.7123,
|
|
"eval_samples_per_second": 133.749,
|
|
"eval_steps_per_second": 4.191,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 7.05,
|
|
"learning_rate": 2.8676607809593486e-05,
|
|
"loss": 1.0641,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 7.16,
|
|
"learning_rate": 2.808213120246745e-05,
|
|
"loss": 1.05,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 7.27,
|
|
"learning_rate": 2.748587938128398e-05,
|
|
"loss": 1.0886,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 7.38,
|
|
"learning_rate": 2.6888195769001146e-05,
|
|
"loss": 1.0564,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 7.49,
|
|
"learning_rate": 2.6289424613245262e-05,
|
|
"loss": 1.0669,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 7.6,
|
|
"learning_rate": 2.5689910788034684e-05,
|
|
"loss": 1.0688,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 7.71,
|
|
"learning_rate": 2.5149998749243508e-05,
|
|
"loss": 1.0059,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 7.82,
|
|
"learning_rate": 2.455002535172917e-05,
|
|
"loss": 0.9851,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 7.93,
|
|
"learning_rate": 2.395031112595951e-05,
|
|
"loss": 1.0784,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_loss": 1.0449837446212769,
|
|
"eval_runtime": 21.7124,
|
|
"eval_samples_per_second": 133.748,
|
|
"eval_steps_per_second": 4.191,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 8.04,
|
|
"learning_rate": 2.3411075408214046e-05,
|
|
"loss": 1.0064,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 8.15,
|
|
"learning_rate": 2.2812804949440014e-05,
|
|
"loss": 1.0385,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 8.26,
|
|
"learning_rate": 2.2215794248653252e-05,
|
|
"loss": 1.0267,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 8.37,
|
|
"learning_rate": 2.162038716590346e-05,
|
|
"loss": 1.0106,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 8.48,
|
|
"learning_rate": 2.1026926637605008e-05,
|
|
"loss": 1.0353,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 8.6,
|
|
"learning_rate": 2.049475900284411e-05,
|
|
"loss": 1.0158,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 8.71,
|
|
"learning_rate": 1.990593754374217e-05,
|
|
"loss": 1.0295,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 8.82,
|
|
"learning_rate": 1.9320050110055677e-05,
|
|
"loss": 0.999,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 8.93,
|
|
"learning_rate": 1.873743415516999e-05,
|
|
"loss": 0.9789,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 8.99,
|
|
"eval_loss": 1.0177918672561646,
|
|
"eval_runtime": 21.6973,
|
|
"eval_samples_per_second": 133.841,
|
|
"eval_steps_per_second": 4.194,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 9.04,
|
|
"learning_rate": 1.815842524819793e-05,
|
|
"loss": 1.006,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 9.15,
|
|
"learning_rate": 1.7583356880702153e-05,
|
|
"loss": 0.9937,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 9.26,
|
|
"learning_rate": 1.701256027461414e-05,
|
|
"loss": 0.9572,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 9.37,
|
|
"learning_rate": 1.644636419146042e-05,
|
|
"loss": 0.9858,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 9.48,
|
|
"learning_rate": 1.5885094743005878e-05,
|
|
"loss": 0.9935,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 9.59,
|
|
"learning_rate": 1.532907520342331e-05,
|
|
"loss": 0.9844,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 9.7,
|
|
"learning_rate": 1.4778625823097295e-05,
|
|
"loss": 1.0214,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 9.81,
|
|
"learning_rate": 1.4288245971537468e-05,
|
|
"loss": 0.9833,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 9.92,
|
|
"learning_rate": 1.3749250546104581e-05,
|
|
"loss": 0.9501,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 9.99,
|
|
"eval_loss": 1.0011532306671143,
|
|
"eval_runtime": 21.7022,
|
|
"eval_samples_per_second": 133.812,
|
|
"eval_steps_per_second": 4.193,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 10.03,
|
|
"learning_rate": 1.3216735210997261e-05,
|
|
"loss": 0.9594,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 10.14,
|
|
"learning_rate": 1.2691006678891446e-05,
|
|
"loss": 0.9868,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 10.25,
|
|
"learning_rate": 1.2223904067394049e-05,
|
|
"loss": 0.9457,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 10.36,
|
|
"learning_rate": 1.1711901313775969e-05,
|
|
"loss": 0.9618,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 10.47,
|
|
"learning_rate": 1.1207552101825911e-05,
|
|
"loss": 0.9453,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 10.58,
|
|
"learning_rate": 1.071114692138746e-05,
|
|
"loss": 0.9559,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 10.69,
|
|
"learning_rate": 1.022297168678309e-05,
|
|
"loss": 0.9382,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 10.8,
|
|
"learning_rate": 9.74330757213615e-06,
|
|
"loss": 0.9346,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 10.91,
|
|
"learning_rate": 9.272430849423174e-06,
|
|
"loss": 0.9238,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 11.0,
|
|
"eval_loss": 0.9903799891471863,
|
|
"eval_runtime": 21.7181,
|
|
"eval_samples_per_second": 133.713,
|
|
"eval_steps_per_second": 4.19,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 11.02,
|
|
"learning_rate": 8.810612729349513e-06,
|
|
"loss": 0.9526,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 11.13,
|
|
"learning_rate": 8.358119205140283e-06,
|
|
"loss": 0.9438,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 11.24,
|
|
"learning_rate": 7.915210899336284e-06,
|
|
"loss": 0.9403,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 11.35,
|
|
"learning_rate": 7.482142913683343e-06,
|
|
"loss": 0.9189,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 11.46,
|
|
"learning_rate": 7.0591646822014605e-06,
|
|
"loss": 0.9529,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 11.57,
|
|
"learning_rate": 6.64651982751833e-06,
|
|
"loss": 0.9356,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 11.68,
|
|
"learning_rate": 6.284171055672483e-06,
|
|
"loss": 0.9623,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 11.79,
|
|
"learning_rate": 5.891809370823214e-06,
|
|
"loss": 0.9565,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 11.9,
|
|
"learning_rate": 5.510453424013248e-06,
|
|
"loss": 0.9128,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_loss": 0.9820969700813293,
|
|
"eval_runtime": 21.6958,
|
|
"eval_samples_per_second": 133.851,
|
|
"eval_steps_per_second": 4.194,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 12.01,
|
|
"learning_rate": 5.140322864697183e-06,
|
|
"loss": 0.9585,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 12.12,
|
|
"learning_rate": 4.7816308768470455e-06,
|
|
"loss": 0.9521,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 12.23,
|
|
"learning_rate": 4.4345840561647204e-06,
|
|
"loss": 0.9381,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 12.34,
|
|
"learning_rate": 4.099382291089151e-06,
|
|
"loss": 0.9924,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 12.45,
|
|
"learning_rate": 3.7762186476666407e-06,
|
|
"loss": 0.9308,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 12.56,
|
|
"learning_rate": 3.4652792583506594e-06,
|
|
"loss": 0.9126,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 12.67,
|
|
"learning_rate": 3.166743214795223e-06,
|
|
"loss": 0.9442,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 12.78,
|
|
"learning_rate": 2.8807824647035103e-06,
|
|
"loss": 0.9031,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 12.89,
|
|
"learning_rate": 2.607561712791226e-06,
|
|
"loss": 0.9032,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 12.99,
|
|
"eval_loss": 0.9770178198814392,
|
|
"eval_runtime": 22.8975,
|
|
"eval_samples_per_second": 126.826,
|
|
"eval_steps_per_second": 3.974,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"learning_rate": 2.3982639093708626e-06,
|
|
"loss": 0.9605,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 13.11,
|
|
"learning_rate": 2.148366729667564e-06,
|
|
"loss": 0.9569,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 13.22,
|
|
"learning_rate": 1.9116313973847803e-06,
|
|
"loss": 0.9131,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 13.33,
|
|
"learning_rate": 1.7099358310249752e-06,
|
|
"loss": 0.9504,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 13.44,
|
|
"learning_rate": 1.4985773307076872e-06,
|
|
"loss": 0.9206,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 13.55,
|
|
"learning_rate": 1.300754936789411e-06,
|
|
"loss": 0.9365,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 13.66,
|
|
"learning_rate": 1.116582588966894e-06,
|
|
"loss": 0.9106,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 13.77,
|
|
"learning_rate": 9.461663649244973e-07,
|
|
"loss": 0.9158,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 13.88,
|
|
"learning_rate": 7.896044192366586e-07,
|
|
"loss": 0.9168,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 13.99,
|
|
"learning_rate": 6.469869268338313e-07,
|
|
"loss": 0.9241,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 13.99,
|
|
"eval_loss": 0.9746606349945068,
|
|
"eval_runtime": 21.6829,
|
|
"eval_samples_per_second": 133.93,
|
|
"eval_steps_per_second": 4.197,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 14.1,
|
|
"learning_rate": 5.183960310644748e-07,
|
|
"loss": 0.8787,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 14.21,
|
|
"learning_rate": 4.0390579638296124e-07,
|
|
"loss": 0.9314,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 14.33,
|
|
"learning_rate": 3.035821656907106e-07,
|
|
"loss": 0.9205,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 14.44,
|
|
"learning_rate": 2.174829223550806e-07,
|
|
"loss": 0.9435,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 14.55,
|
|
"learning_rate": 1.4565765692788337e-07,
|
|
"loss": 0.9005,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 14.66,
|
|
"learning_rate": 8.814773858275004e-08,
|
|
"loss": 0.8925,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 14.77,
|
|
"learning_rate": 4.498629128773524e-08,
|
|
"loss": 0.9124,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 14.88,
|
|
"learning_rate": 1.619817472691687e-08,
|
|
"loss": 0.9458,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 14.88,
|
|
"eval_loss": 0.9742981195449829,
|
|
"eval_runtime": 21.7077,
|
|
"eval_samples_per_second": 133.777,
|
|
"eval_steps_per_second": 4.192,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 14.88,
|
|
"step": 1350,
|
|
"total_flos": 2.980342652110635e+18,
|
|
"train_loss": 1.1214810215985334,
|
|
"train_runtime": 3241.6485,
|
|
"train_samples_per_second": 53.75,
|
|
"train_steps_per_second": 0.416
|
|
}
|
|
],
|
|
"max_steps": 1350,
|
|
"num_train_epochs": 15,
|
|
"total_flos": 2.980342652110635e+18,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|