generated from xuyuqing/ailab
13962 lines
277 KiB
JSON
13962 lines
277 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"global_step": 23160,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 7.194244604316547e-07,
|
|
"loss": 5.0794,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 1.4388489208633094e-06,
|
|
"loss": 5.0596,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 2.1582733812949645e-06,
|
|
"loss": 5.0769,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 2.877697841726619e-06,
|
|
"loss": 5.069,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 3.5971223021582732e-06,
|
|
"loss": 4.9247,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 4.316546762589929e-06,
|
|
"loss": 4.8425,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 5.035971223021583e-06,
|
|
"loss": 4.8177,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 5.755395683453238e-06,
|
|
"loss": 4.7713,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 6.474820143884892e-06,
|
|
"loss": 4.5874,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 7.1942446043165465e-06,
|
|
"loss": 4.4,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 7.913669064748202e-06,
|
|
"loss": 4.2331,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 8.633093525179858e-06,
|
|
"loss": 3.9907,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.352517985611512e-06,
|
|
"loss": 3.832,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.0071942446043167e-05,
|
|
"loss": 3.6927,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.0791366906474821e-05,
|
|
"loss": 3.6372,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.1510791366906475e-05,
|
|
"loss": 3.6589,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.223021582733813e-05,
|
|
"loss": 3.5743,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.2949640287769784e-05,
|
|
"loss": 3.5936,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.366906474820144e-05,
|
|
"loss": 3.5464,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.4388489208633093e-05,
|
|
"loss": 3.5314,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.5107913669064749e-05,
|
|
"loss": 3.5154,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.5827338129496403e-05,
|
|
"loss": 3.4843,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.6546762589928058e-05,
|
|
"loss": 3.5982,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.7266187050359716e-05,
|
|
"loss": 3.5369,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.7985611510791367e-05,
|
|
"loss": 3.468,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.8705035971223024e-05,
|
|
"loss": 3.4998,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.942446043165468e-05,
|
|
"loss": 3.5115,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 2.0143884892086333e-05,
|
|
"loss": 3.5545,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 2.0863309352517988e-05,
|
|
"loss": 3.4808,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 2.1582733812949642e-05,
|
|
"loss": 3.4455,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 2.2302158273381296e-05,
|
|
"loss": 3.4575,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 2.302158273381295e-05,
|
|
"loss": 3.4486,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 2.3741007194244605e-05,
|
|
"loss": 3.3872,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 2.446043165467626e-05,
|
|
"loss": 3.4609,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 2.5179856115107914e-05,
|
|
"loss": 3.4621,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 2.589928057553957e-05,
|
|
"loss": 3.4752,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 2.6618705035971226e-05,
|
|
"loss": 3.4315,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 2.733812949640288e-05,
|
|
"loss": 3.3985,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 2.805755395683453e-05,
|
|
"loss": 3.4633,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 2.8776978417266186e-05,
|
|
"loss": 3.4653,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 2.9496402877697844e-05,
|
|
"loss": 3.4259,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 3.0215827338129498e-05,
|
|
"loss": 3.4427,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 3.0935251798561156e-05,
|
|
"loss": 3.4442,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 3.165467625899281e-05,
|
|
"loss": 3.4401,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 3.237410071942446e-05,
|
|
"loss": 3.3566,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 3.3093525179856116e-05,
|
|
"loss": 3.3678,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 3.3812949640287773e-05,
|
|
"loss": 3.3995,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 3.453237410071943e-05,
|
|
"loss": 3.4305,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 3.5251798561151075e-05,
|
|
"loss": 3.3855,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 3.597122302158273e-05,
|
|
"loss": 3.421,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 3.669064748201439e-05,
|
|
"loss": 3.3819,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 3.741007194244605e-05,
|
|
"loss": 3.3863,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 3.81294964028777e-05,
|
|
"loss": 3.4173,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 3.884892086330936e-05,
|
|
"loss": 3.3894,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 3.956834532374101e-05,
|
|
"loss": 3.4352,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.0287769784172666e-05,
|
|
"loss": 3.4051,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.100719424460432e-05,
|
|
"loss": 3.4108,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.1726618705035975e-05,
|
|
"loss": 3.3722,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.244604316546763e-05,
|
|
"loss": 3.3759,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.3165467625899284e-05,
|
|
"loss": 3.421,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.3884892086330935e-05,
|
|
"loss": 3.4149,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.460431654676259e-05,
|
|
"loss": 3.3518,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.532374100719425e-05,
|
|
"loss": 3.408,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.60431654676259e-05,
|
|
"loss": 3.334,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.676258992805755e-05,
|
|
"loss": 3.389,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.748201438848921e-05,
|
|
"loss": 3.3713,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.820143884892087e-05,
|
|
"loss": 3.3702,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.892086330935252e-05,
|
|
"loss": 3.4156,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.964028776978418e-05,
|
|
"loss": 3.4023,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.999999388865356e-05,
|
|
"loss": 3.3626,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.9999944997899994e-05,
|
|
"loss": 3.3751,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.999984721648846e-05,
|
|
"loss": 3.3962,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.9999700544610196e-05,
|
|
"loss": 3.3803,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.999950498255203e-05,
|
|
"loss": 3.3778,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.999926053069641e-05,
|
|
"loss": 3.3897,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.99989671895214e-05,
|
|
"loss": 3.4078,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.9998624959600656e-05,
|
|
"loss": 3.3542,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.999823384160347e-05,
|
|
"loss": 3.3488,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.999779383629471e-05,
|
|
"loss": 3.3988,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.999730494453487e-05,
|
|
"loss": 3.3759,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.9996767167280034e-05,
|
|
"loss": 3.3742,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.9996180505581904e-05,
|
|
"loss": 3.3853,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.999554496058777e-05,
|
|
"loss": 3.3318,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.9994860533540526e-05,
|
|
"loss": 3.3575,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.999412722577866e-05,
|
|
"loss": 3.3645,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.999334503873624e-05,
|
|
"loss": 3.3681,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.9992513973942954e-05,
|
|
"loss": 3.3345,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.9991634033024036e-05,
|
|
"loss": 3.385,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.999070521770034e-05,
|
|
"loss": 3.343,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.998972752978828e-05,
|
|
"loss": 3.3678,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.998870097119985e-05,
|
|
"loss": 3.349,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.9987625543942635e-05,
|
|
"loss": 3.3943,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.998650125011975e-05,
|
|
"loss": 3.3375,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.9985328091929916e-05,
|
|
"loss": 3.4051,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.9984106071667394e-05,
|
|
"loss": 3.3285,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.9982835191722e-05,
|
|
"loss": 3.3466,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.9981515454579107e-05,
|
|
"loss": 3.3341,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.9980146862819634e-05,
|
|
"loss": 3.3003,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.997872941912004e-05,
|
|
"loss": 3.3813,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.9977263126252326e-05,
|
|
"loss": 3.3458,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.997574798708401e-05,
|
|
"loss": 3.3217,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.997418400457815e-05,
|
|
"loss": 3.3223,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.997257118179332e-05,
|
|
"loss": 3.3682,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.99709095218836e-05,
|
|
"loss": 3.3303,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.99691990280986e-05,
|
|
"loss": 3.3583,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.996743970378338e-05,
|
|
"loss": 3.3536,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.9965631552378564e-05,
|
|
"loss": 3.363,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.9963774577420205e-05,
|
|
"loss": 3.3257,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.996186878253988e-05,
|
|
"loss": 3.3474,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.9959914171464596e-05,
|
|
"loss": 3.3786,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.995791074801687e-05,
|
|
"loss": 3.3219,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.995585851611464e-05,
|
|
"loss": 3.3586,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.995375747977133e-05,
|
|
"loss": 3.3308,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.9951607643095796e-05,
|
|
"loss": 3.2885,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.99494090102923e-05,
|
|
"loss": 3.3731,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.994716158566058e-05,
|
|
"loss": 3.3311,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.994486537359575e-05,
|
|
"loss": 3.3108,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.994252037858836e-05,
|
|
"loss": 3.2939,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.994012660522436e-05,
|
|
"loss": 3.3468,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.993768405818508e-05,
|
|
"loss": 3.3452,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.993519274224724e-05,
|
|
"loss": 3.4028,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.993265266228293e-05,
|
|
"loss": 3.3021,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.993006382325962e-05,
|
|
"loss": 3.3746,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.992742623024011e-05,
|
|
"loss": 3.3682,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.992473988838257e-05,
|
|
"loss": 3.3018,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.9922004802940476e-05,
|
|
"loss": 3.3312,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.991922097926266e-05,
|
|
"loss": 3.2805,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.9916388422793246e-05,
|
|
"loss": 3.329,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.991350713907167e-05,
|
|
"loss": 3.339,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.991057713373266e-05,
|
|
"loss": 3.3383,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.9907598412506215e-05,
|
|
"loss": 3.3469,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.9904570981217624e-05,
|
|
"loss": 3.3102,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.990149484578742e-05,
|
|
"loss": 3.2872,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.98983700122314e-05,
|
|
"loss": 3.3041,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.989519648666056e-05,
|
|
"loss": 3.362,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.989197427528116e-05,
|
|
"loss": 3.2895,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.9888703384394655e-05,
|
|
"loss": 3.3185,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.988538382039769e-05,
|
|
"loss": 3.3665,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.988201558978212e-05,
|
|
"loss": 3.2901,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.9878598699134945e-05,
|
|
"loss": 3.3005,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.9875133155138357e-05,
|
|
"loss": 3.2858,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.9871618964569666e-05,
|
|
"loss": 3.3412,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.9868056134301333e-05,
|
|
"loss": 3.2808,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.986444467130095e-05,
|
|
"loss": 3.2828,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.9860784582631184e-05,
|
|
"loss": 3.3237,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.985707587544983e-05,
|
|
"loss": 3.3013,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.9853318557009756e-05,
|
|
"loss": 3.3343,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.984951263465887e-05,
|
|
"loss": 3.301,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.9845658115840166e-05,
|
|
"loss": 3.3194,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.984175500809165e-05,
|
|
"loss": 3.3356,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.9837803319046365e-05,
|
|
"loss": 3.2681,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.983380305643235e-05,
|
|
"loss": 3.2769,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.982975422807265e-05,
|
|
"loss": 3.3332,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.982565684188526e-05,
|
|
"loss": 3.2726,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.982151090588318e-05,
|
|
"loss": 3.2455,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.981731642817431e-05,
|
|
"loss": 3.2954,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.9813073416961494e-05,
|
|
"loss": 3.2877,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.980878188054252e-05,
|
|
"loss": 3.287,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.980444182731002e-05,
|
|
"loss": 3.3168,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.9800053265751555e-05,
|
|
"loss": 3.3364,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.979561620444952e-05,
|
|
"loss": 3.3212,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.979113065208117e-05,
|
|
"loss": 3.2759,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.978659661741859e-05,
|
|
"loss": 3.2946,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.9782014109328674e-05,
|
|
"loss": 3.3021,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 4.977738313677312e-05,
|
|
"loss": 3.3222,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 4.97727037088084e-05,
|
|
"loss": 3.2956,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 4.976797583458573e-05,
|
|
"loss": 3.3278,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 4.976319952335112e-05,
|
|
"loss": 3.3005,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 4.975837478444524e-05,
|
|
"loss": 3.314,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 4.975350162730351e-05,
|
|
"loss": 3.2943,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 4.974858006145602e-05,
|
|
"loss": 3.2535,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 4.974361009652753e-05,
|
|
"loss": 3.2621,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 4.9738591742237476e-05,
|
|
"loss": 3.2934,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.9733525008399886e-05,
|
|
"loss": 3.2551,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.972840990492342e-05,
|
|
"loss": 3.3217,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.9723246441811346e-05,
|
|
"loss": 3.275,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.971803462916148e-05,
|
|
"loss": 3.3001,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.9712774477166204e-05,
|
|
"loss": 3.2643,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 4.970746599611243e-05,
|
|
"loss": 3.3422,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 4.9702109196381585e-05,
|
|
"loss": 3.2945,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 4.9696704088449605e-05,
|
|
"loss": 3.2676,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 4.9691250682886866e-05,
|
|
"loss": 3.2506,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.968574899035822e-05,
|
|
"loss": 3.2974,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.968019902162295e-05,
|
|
"loss": 3.247,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.967460078753475e-05,
|
|
"loss": 3.2732,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.9668954299041685e-05,
|
|
"loss": 3.2766,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.96632595671862e-05,
|
|
"loss": 3.3118,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.9657516603105104e-05,
|
|
"loss": 3.2738,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.96517254180295e-05,
|
|
"loss": 3.2563,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.964588602328481e-05,
|
|
"loss": 3.3043,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.963999843029073e-05,
|
|
"loss": 3.2722,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.9634062650561225e-05,
|
|
"loss": 3.2564,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 4.962807869570448e-05,
|
|
"loss": 3.2751,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 4.9622046577422895e-05,
|
|
"loss": 3.3186,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 4.961596630751308e-05,
|
|
"loss": 3.2683,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 4.9609837897865795e-05,
|
|
"loss": 3.2541,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.9603661360465944e-05,
|
|
"loss": 3.2493,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.9597436707392545e-05,
|
|
"loss": 3.3009,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.959116395081872e-05,
|
|
"loss": 3.2042,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.9584843103011666e-05,
|
|
"loss": 3.238,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.957847417633264e-05,
|
|
"loss": 3.302,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.9572057183236884e-05,
|
|
"loss": 3.307,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.95655921362737e-05,
|
|
"loss": 3.2456,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.95590790480863e-05,
|
|
"loss": 3.2762,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.955251793141189e-05,
|
|
"loss": 3.2395,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.95459087990816e-05,
|
|
"loss": 3.2899,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.9539251664020435e-05,
|
|
"loss": 3.2941,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.953254653924732e-05,
|
|
"loss": 3.2739,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.952579343787496e-05,
|
|
"loss": 3.2443,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.951899237310996e-05,
|
|
"loss": 3.2518,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.951214335825268e-05,
|
|
"loss": 3.2615,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.950524640669727e-05,
|
|
"loss": 3.2618,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.949830153193161e-05,
|
|
"loss": 3.2335,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.949130874753731e-05,
|
|
"loss": 3.2111,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.9484268067189685e-05,
|
|
"loss": 3.2711,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.947717950465769e-05,
|
|
"loss": 3.2634,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.947004307380394e-05,
|
|
"loss": 3.3269,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.946285878858467e-05,
|
|
"loss": 3.2958,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.945562666304967e-05,
|
|
"loss": 3.2561,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.9448346711342306e-05,
|
|
"loss": 3.2538,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.94410189476995e-05,
|
|
"loss": 3.2647,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.9433643386451625e-05,
|
|
"loss": 3.2753,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.942622004202255e-05,
|
|
"loss": 3.2721,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.941874892892963e-05,
|
|
"loss": 3.2566,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.941123006178357e-05,
|
|
"loss": 3.3152,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.94036634552885e-05,
|
|
"loss": 3.2924,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.939604912424192e-05,
|
|
"loss": 3.3046,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.938838708353464e-05,
|
|
"loss": 3.2171,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.9380677348150786e-05,
|
|
"loss": 3.3019,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.937291993316775e-05,
|
|
"loss": 3.2984,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.9365114853756164e-05,
|
|
"loss": 3.3227,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.93572621251799e-05,
|
|
"loss": 3.2673,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.934936176279598e-05,
|
|
"loss": 3.2657,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.9341413782054594e-05,
|
|
"loss": 3.1637,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.933341819849906e-05,
|
|
"loss": 3.274,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.9325375027765786e-05,
|
|
"loss": 3.2957,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.9317284285584245e-05,
|
|
"loss": 3.2735,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.9309145987776926e-05,
|
|
"loss": 3.2637,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.9300960150259354e-05,
|
|
"loss": 3.3366,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.929272678903999e-05,
|
|
"loss": 3.2555,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.9284445920220255e-05,
|
|
"loss": 3.2773,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.927611755999446e-05,
|
|
"loss": 3.228,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.926774172464982e-05,
|
|
"loss": 3.3036,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.925931843056635e-05,
|
|
"loss": 3.2746,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.9250847694216916e-05,
|
|
"loss": 3.2754,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.924232953216715e-05,
|
|
"loss": 3.2619,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.923376396107543e-05,
|
|
"loss": 3.2097,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.922515099769285e-05,
|
|
"loss": 3.301,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.921649065886318e-05,
|
|
"loss": 3.251,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.9207782961522844e-05,
|
|
"loss": 3.282,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.919902792270088e-05,
|
|
"loss": 3.293,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.919022555951892e-05,
|
|
"loss": 3.2864,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.918137588919113e-05,
|
|
"loss": 3.2326,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.917247892902419e-05,
|
|
"loss": 3.299,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.916353469641727e-05,
|
|
"loss": 3.242,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.915454320886199e-05,
|
|
"loss": 3.2348,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.9145504483942384e-05,
|
|
"loss": 3.2232,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.913641853933484e-05,
|
|
"loss": 3.256,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.912728539280813e-05,
|
|
"loss": 3.1991,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.9118105062223305e-05,
|
|
"loss": 3.2452,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.9108877565533694e-05,
|
|
"loss": 3.3276,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.9099602920784884e-05,
|
|
"loss": 3.2496,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.9090281146114655e-05,
|
|
"loss": 3.2857,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.9080912259752946e-05,
|
|
"loss": 3.2562,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.907149628002184e-05,
|
|
"loss": 3.2469,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.906203322533552e-05,
|
|
"loss": 3.2338,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.905252311420022e-05,
|
|
"loss": 3.2455,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.9042965965214205e-05,
|
|
"loss": 3.2221,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.903336179706773e-05,
|
|
"loss": 3.2349,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.9023710628543004e-05,
|
|
"loss": 3.2576,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.901401247851415e-05,
|
|
"loss": 3.2727,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.900426736594716e-05,
|
|
"loss": 3.2918,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.899447530989987e-05,
|
|
"loss": 3.2933,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.8984636329521936e-05,
|
|
"loss": 3.2786,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.897475044405476e-05,
|
|
"loss": 3.276,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.896481767283149e-05,
|
|
"loss": 3.2557,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.895483803527695e-05,
|
|
"loss": 3.2517,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.894481155090763e-05,
|
|
"loss": 3.2673,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.8934738239331634e-05,
|
|
"loss": 3.2854,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.892461812024863e-05,
|
|
"loss": 3.2277,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 4.891445121344983e-05,
|
|
"loss": 3.2537,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 4.8904237538817966e-05,
|
|
"loss": 3.1999,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 4.8893977116327195e-05,
|
|
"loss": 3.2162,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 4.888366996604312e-05,
|
|
"loss": 3.2757,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.8873316108122714e-05,
|
|
"loss": 3.2181,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.886291556281431e-05,
|
|
"loss": 3.2265,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.885246835045752e-05,
|
|
"loss": 3.2064,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.8841974491483244e-05,
|
|
"loss": 3.2932,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.883143400641359e-05,
|
|
"loss": 3.2837,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.882084691586185e-05,
|
|
"loss": 3.2177,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.881021324053247e-05,
|
|
"loss": 3.2502,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.8799533001220984e-05,
|
|
"loss": 3.2455,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.8788806218814e-05,
|
|
"loss": 3.2652,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.877803291428913e-05,
|
|
"loss": 3.2276,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 4.876721310871499e-05,
|
|
"loss": 3.2539,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 4.875634682325113e-05,
|
|
"loss": 3.2329,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 4.874543407914797e-05,
|
|
"loss": 3.2349,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 4.873447489774682e-05,
|
|
"loss": 3.2197,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.8723469300479785e-05,
|
|
"loss": 3.2482,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.871241730886976e-05,
|
|
"loss": 3.2096,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.8701318944530346e-05,
|
|
"loss": 3.246,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.869017422916584e-05,
|
|
"loss": 3.2549,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.867898318457121e-05,
|
|
"loss": 3.2463,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 4.866774583263198e-05,
|
|
"loss": 3.2694,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 4.865646219532428e-05,
|
|
"loss": 3.2059,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 4.8645132294714726e-05,
|
|
"loss": 3.2356,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 4.8633756152960416e-05,
|
|
"loss": 3.2536,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 4.862233379230889e-05,
|
|
"loss": 3.2186,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.861086523509806e-05,
|
|
"loss": 3.2559,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.8599350503756194e-05,
|
|
"loss": 3.2693,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.858778962080184e-05,
|
|
"loss": 3.2265,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.857618260884383e-05,
|
|
"loss": 3.1982,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.856452949058118e-05,
|
|
"loss": 3.2221,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.8552830288803096e-05,
|
|
"loss": 3.2693,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.8541085026388885e-05,
|
|
"loss": 3.2149,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.8529293726307954e-05,
|
|
"loss": 3.232,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.8517456411619724e-05,
|
|
"loss": 3.2457,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.85055731054736e-05,
|
|
"loss": 3.2641,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.849364383110895e-05,
|
|
"loss": 3.2265,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.848166861185503e-05,
|
|
"loss": 3.19,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.846964747113094e-05,
|
|
"loss": 3.2042,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.845758043244559e-05,
|
|
"loss": 3.1866,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.844546751939766e-05,
|
|
"loss": 3.2578,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.843330875567553e-05,
|
|
"loss": 3.2358,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.842110416505724e-05,
|
|
"loss": 3.2441,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.840885377141047e-05,
|
|
"loss": 3.2166,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.839655759869247e-05,
|
|
"loss": 3.219,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.8384215670949994e-05,
|
|
"loss": 3.2623,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.8371828012319315e-05,
|
|
"loss": 3.2096,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.83593946470261e-05,
|
|
"loss": 3.2061,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.8346915599385434e-05,
|
|
"loss": 3.2489,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 4.833439089380172e-05,
|
|
"loss": 3.1859,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 4.832182055476865e-05,
|
|
"loss": 3.2674,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 4.830920460686916e-05,
|
|
"loss": 3.2582,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 4.82965430747754e-05,
|
|
"loss": 3.2694,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 4.8283835983248635e-05,
|
|
"loss": 3.201,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 4.827108335713926e-05,
|
|
"loss": 3.2592,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 4.825828522138668e-05,
|
|
"loss": 3.2483,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 4.824544160101933e-05,
|
|
"loss": 3.2182,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 4.82325525211546e-05,
|
|
"loss": 3.228,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 4.821961800699874e-05,
|
|
"loss": 3.2215,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 4.820663808384691e-05,
|
|
"loss": 3.1826,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 4.8193612777083034e-05,
|
|
"loss": 3.2198,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 4.8180542112179795e-05,
|
|
"loss": 3.2331,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 4.8167426114698595e-05,
|
|
"loss": 3.2887,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 4.8154264810289473e-05,
|
|
"loss": 3.1379,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 4.814105822469109e-05,
|
|
"loss": 3.2432,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 4.812780638373064e-05,
|
|
"loss": 3.2247,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 4.811450931332383e-05,
|
|
"loss": 3.2408,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 4.810116703947482e-05,
|
|
"loss": 3.2435,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 4.8087779588276166e-05,
|
|
"loss": 3.2466,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 4.8074346985908784e-05,
|
|
"loss": 3.1736,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 4.806086925864187e-05,
|
|
"loss": 3.2274,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 4.80473464328329e-05,
|
|
"loss": 3.2527,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 4.803377853492751e-05,
|
|
"loss": 3.1531,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 4.80201655914595e-05,
|
|
"loss": 3.2354,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 4.800650762905076e-05,
|
|
"loss": 3.1245,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 4.799280467441122e-05,
|
|
"loss": 3.2432,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 4.79790567543388e-05,
|
|
"loss": 3.2156,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 4.7965263895719356e-05,
|
|
"loss": 3.2319,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 4.795142612552661e-05,
|
|
"loss": 3.2462,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 4.793754347082214e-05,
|
|
"loss": 3.1948,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 4.7923615958755296e-05,
|
|
"loss": 3.181,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 4.790964361656314e-05,
|
|
"loss": 3.2148,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 4.789562647157041e-05,
|
|
"loss": 3.2806,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 4.7881564551189466e-05,
|
|
"loss": 3.2501,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 4.786745788292023e-05,
|
|
"loss": 3.2016,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 4.785330649435014e-05,
|
|
"loss": 3.2697,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 4.783911041315408e-05,
|
|
"loss": 3.2199,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 4.782486966709434e-05,
|
|
"loss": 3.233,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 4.781058428402055e-05,
|
|
"loss": 3.2674,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 4.779625429186967e-05,
|
|
"loss": 3.2107,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 4.778187971866584e-05,
|
|
"loss": 3.2161,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 4.776746059252044e-05,
|
|
"loss": 3.1863,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 4.775299694163194e-05,
|
|
"loss": 3.3145,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 4.7738488794285906e-05,
|
|
"loss": 3.2307,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 4.7723936178854914e-05,
|
|
"loss": 3.2021,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 4.7709339123798494e-05,
|
|
"loss": 3.197,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 4.769469765766311e-05,
|
|
"loss": 3.2191,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 4.768001180908205e-05,
|
|
"loss": 3.2242,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 4.766528160677541e-05,
|
|
"loss": 3.224,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 4.7650507079550034e-05,
|
|
"loss": 3.2423,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 4.763568825629943e-05,
|
|
"loss": 3.2121,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 4.762082516600375e-05,
|
|
"loss": 3.204,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 4.7605917837729704e-05,
|
|
"loss": 3.1812,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 4.759096630063052e-05,
|
|
"loss": 3.2127,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 4.757597058394588e-05,
|
|
"loss": 3.2645,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 4.756093071700187e-05,
|
|
"loss": 3.2243,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 4.754584672921091e-05,
|
|
"loss": 3.1511,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 4.753071865007172e-05,
|
|
"loss": 3.1984,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 4.751554650916922e-05,
|
|
"loss": 3.1949,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 4.7500330336174514e-05,
|
|
"loss": 3.1599,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 4.7485070160844836e-05,
|
|
"loss": 3.19,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 4.746976601302343e-05,
|
|
"loss": 3.1638,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 4.745441792263956e-05,
|
|
"loss": 3.1596,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 4.743902591970843e-05,
|
|
"loss": 3.2311,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 4.74235900343311e-05,
|
|
"loss": 3.2572,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 4.740811029669448e-05,
|
|
"loss": 3.268,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 4.73925867370712e-05,
|
|
"loss": 3.1662,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 4.737701938581962e-05,
|
|
"loss": 3.2194,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 4.736140827338372e-05,
|
|
"loss": 3.2122,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 4.734575343029307e-05,
|
|
"loss": 3.2267,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 4.733005488716277e-05,
|
|
"loss": 3.1749,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 4.731431267469336e-05,
|
|
"loss": 3.2067,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 4.729852682367081e-05,
|
|
"loss": 3.2288,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 4.728269736496639e-05,
|
|
"loss": 3.2051,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 4.726682432953668e-05,
|
|
"loss": 3.2347,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 4.7250907748423504e-05,
|
|
"loss": 3.2455,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 4.723494765275378e-05,
|
|
"loss": 3.2045,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 4.721894407373956e-05,
|
|
"loss": 3.2157,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 4.720289704267796e-05,
|
|
"loss": 3.1917,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 4.7186806590951025e-05,
|
|
"loss": 3.1743,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 4.7170672750025736e-05,
|
|
"loss": 3.2385,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 4.715449555145394e-05,
|
|
"loss": 3.1944,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.713827502687224e-05,
|
|
"loss": 3.245,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.712201120800201e-05,
|
|
"loss": 3.2108,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.7105704126649264e-05,
|
|
"loss": 3.2386,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.708935381470463e-05,
|
|
"loss": 3.1985,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.707296030414328e-05,
|
|
"loss": 3.2304,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.705652362702486e-05,
|
|
"loss": 3.2157,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.704004381549345e-05,
|
|
"loss": 3.2249,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.7023520901777475e-05,
|
|
"loss": 3.2204,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.700695491818963e-05,
|
|
"loss": 3.1909,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 4.699034589712688e-05,
|
|
"loss": 3.2335,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 4.697369387107033e-05,
|
|
"loss": 3.185,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 4.695699887258519e-05,
|
|
"loss": 3.2263,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 4.694026093432071e-05,
|
|
"loss": 3.1998,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 4.692348008901011e-05,
|
|
"loss": 3.2261,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 4.6906656369470536e-05,
|
|
"loss": 3.2364,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 4.688978980860297e-05,
|
|
"loss": 3.2066,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 4.6872880439392156e-05,
|
|
"loss": 3.1554,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 4.685592829490659e-05,
|
|
"loss": 3.2672,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 4.6838933408298415e-05,
|
|
"loss": 3.2105,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 4.6821895812803315e-05,
|
|
"loss": 3.2472,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 4.680481554174058e-05,
|
|
"loss": 3.2362,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 4.678769262851288e-05,
|
|
"loss": 3.1689,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 4.677052710660631e-05,
|
|
"loss": 3.1824,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 4.67533190095903e-05,
|
|
"loss": 3.2347,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 4.6736068371117525e-05,
|
|
"loss": 3.2139,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 4.671877522492387e-05,
|
|
"loss": 3.2325,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 4.6701439604828335e-05,
|
|
"loss": 3.175,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 4.6684061544732996e-05,
|
|
"loss": 3.1917,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 4.66666410786229e-05,
|
|
"loss": 3.1885,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 4.664917824056607e-05,
|
|
"loss": 3.23,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 4.6631673064713347e-05,
|
|
"loss": 3.1515,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 4.6614125585298404e-05,
|
|
"loss": 3.1813,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 4.659653583663762e-05,
|
|
"loss": 3.2451,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 4.657890385313004e-05,
|
|
"loss": 3.1761,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 4.656122966925733e-05,
|
|
"loss": 3.1553,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 4.654351331958365e-05,
|
|
"loss": 3.1395,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 4.6525754838755635e-05,
|
|
"loss": 3.1984,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 4.6507954261502324e-05,
|
|
"loss": 3.1204,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 4.649011162263506e-05,
|
|
"loss": 3.1619,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 4.6472226957047473e-05,
|
|
"loss": 3.2132,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 4.645430029971534e-05,
|
|
"loss": 3.1963,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 4.643633168569659e-05,
|
|
"loss": 3.205,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 4.641832115013119e-05,
|
|
"loss": 3.2537,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 4.6400268728241106e-05,
|
|
"loss": 3.1919,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 4.63821744553302e-05,
|
|
"loss": 3.1815,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 4.636403836678419e-05,
|
|
"loss": 3.2031,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 4.634586049807056e-05,
|
|
"loss": 3.2011,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 4.6327640884738507e-05,
|
|
"loss": 3.1694,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 4.630937956241887e-05,
|
|
"loss": 3.2151,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_loss": 3.193521499633789,
|
|
"eval_runtime": 191.4113,
|
|
"eval_samples_per_second": 774.353,
|
|
"eval_steps_per_second": 24.199,
|
|
"step": 4632
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 4.629107656682405e-05,
|
|
"loss": 3.2192,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 4.6272731933747945e-05,
|
|
"loss": 3.1595,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 4.625434569906587e-05,
|
|
"loss": 3.1397,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 4.6235917898734526e-05,
|
|
"loss": 3.1916,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 4.6217448568791874e-05,
|
|
"loss": 3.1508,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 4.619893774535711e-05,
|
|
"loss": 3.2277,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 4.6180385464630546e-05,
|
|
"loss": 3.1812,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 4.616179176289361e-05,
|
|
"loss": 3.1803,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 4.614315667650872e-05,
|
|
"loss": 3.2129,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 4.61244802419192e-05,
|
|
"loss": 3.1907,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 4.610576249564926e-05,
|
|
"loss": 3.2011,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 4.608700347430392e-05,
|
|
"loss": 3.1798,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 4.606820321456887e-05,
|
|
"loss": 3.1975,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 4.6049361753210496e-05,
|
|
"loss": 3.2008,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 4.603047912707572e-05,
|
|
"loss": 3.2043,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 4.6011555373091994e-05,
|
|
"loss": 3.2204,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 4.5992590528267185e-05,
|
|
"loss": 3.2014,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 4.5973584629689524e-05,
|
|
"loss": 3.2395,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 4.5954537714527534e-05,
|
|
"loss": 3.2104,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 4.593544982002994e-05,
|
|
"loss": 3.1612,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 4.591632098352562e-05,
|
|
"loss": 3.1777,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 4.5897151242423504e-05,
|
|
"loss": 3.2225,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 4.5877940634212524e-05,
|
|
"loss": 3.2241,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 4.5858689196461545e-05,
|
|
"loss": 3.1808,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 4.583939696681926e-05,
|
|
"loss": 3.1117,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 4.582006398301414e-05,
|
|
"loss": 3.1607,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 4.580069028285437e-05,
|
|
"loss": 3.1559,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 4.578127590422774e-05,
|
|
"loss": 3.1716,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 4.576182088510161e-05,
|
|
"loss": 3.1874,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 4.5742325263522825e-05,
|
|
"loss": 3.2111,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 4.572278907761759e-05,
|
|
"loss": 3.2195,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 4.570321236559149e-05,
|
|
"loss": 3.1611,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 4.568359516572933e-05,
|
|
"loss": 3.1852,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 4.566393751639512e-05,
|
|
"loss": 3.2029,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 4.5644239456031953e-05,
|
|
"loss": 3.1492,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 4.562450102316196e-05,
|
|
"loss": 3.1708,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 4.560472225638622e-05,
|
|
"loss": 3.1848,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 4.558490319438471e-05,
|
|
"loss": 3.1712,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 4.556504387591618e-05,
|
|
"loss": 3.1685,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 4.554514433981812e-05,
|
|
"loss": 3.1802,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 4.552520462500668e-05,
|
|
"loss": 3.1475,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 4.5505224770476576e-05,
|
|
"loss": 3.1959,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 4.548520481530102e-05,
|
|
"loss": 3.176,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 4.5465144798631655e-05,
|
|
"loss": 3.1767,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 4.544504475969846e-05,
|
|
"loss": 3.1642,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 4.542490473780968e-05,
|
|
"loss": 3.1974,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 4.540472477235177e-05,
|
|
"loss": 3.1425,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 4.538450490278928e-05,
|
|
"loss": 3.2191,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 4.536424516866482e-05,
|
|
"loss": 3.1307,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 4.534394560959893e-05,
|
|
"loss": 3.1451,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 4.5323606265290045e-05,
|
|
"loss": 3.152,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 4.5303227175514426e-05,
|
|
"loss": 3.2005,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 4.528280838012605e-05,
|
|
"loss": 3.214,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 4.5262349919056514e-05,
|
|
"loss": 3.1606,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 4.524185183231503e-05,
|
|
"loss": 3.1831,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 4.522131415998828e-05,
|
|
"loss": 3.2262,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 4.520073694224036e-05,
|
|
"loss": 3.188,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 4.518012021931271e-05,
|
|
"loss": 3.2036,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 4.515946403152403e-05,
|
|
"loss": 3.196,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 4.5138768419270184e-05,
|
|
"loss": 3.1898,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 4.511803342302415e-05,
|
|
"loss": 3.1713,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 4.509725908333592e-05,
|
|
"loss": 3.1812,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 4.507644544083244e-05,
|
|
"loss": 3.1826,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 4.505559253621748e-05,
|
|
"loss": 3.1169,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 4.503470041027165e-05,
|
|
"loss": 3.1776,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 4.5013769103852204e-05,
|
|
"loss": 3.1778,
|
|
"step": 5290
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 4.499279865789307e-05,
|
|
"loss": 3.1548,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 4.497178911340467e-05,
|
|
"loss": 3.1902,
|
|
"step": 5310
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 4.495074051147393e-05,
|
|
"loss": 3.1998,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 4.492965289326414e-05,
|
|
"loss": 3.2263,
|
|
"step": 5330
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 4.4908526300014885e-05,
|
|
"loss": 3.174,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 4.488736077304198e-05,
|
|
"loss": 3.2044,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 4.486615635373738e-05,
|
|
"loss": 3.1525,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 4.484491308356909e-05,
|
|
"loss": 3.1919,
|
|
"step": 5370
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 4.482363100408111e-05,
|
|
"loss": 3.1455,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 4.4802310156893314e-05,
|
|
"loss": 3.1776,
|
|
"step": 5390
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 4.478095058370141e-05,
|
|
"loss": 3.163,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 4.475955232627684e-05,
|
|
"loss": 3.2097,
|
|
"step": 5410
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 4.4738115426466675e-05,
|
|
"loss": 3.2074,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 4.4716639926193595e-05,
|
|
"loss": 3.2065,
|
|
"step": 5430
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 4.4695125867455725e-05,
|
|
"loss": 3.1484,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 4.467357329232663e-05,
|
|
"loss": 3.1814,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 4.465198224295518e-05,
|
|
"loss": 3.0919,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 4.4630352761565494e-05,
|
|
"loss": 3.1578,
|
|
"step": 5470
|
|
},
|
|
{
|
|
"epoch": 1.18,
|
|
"learning_rate": 4.4608684890456845e-05,
|
|
"loss": 3.1611,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 4.458697867200359e-05,
|
|
"loss": 3.1136,
|
|
"step": 5490
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 4.456523414865507e-05,
|
|
"loss": 3.2022,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 4.454345136293554e-05,
|
|
"loss": 3.1829,
|
|
"step": 5510
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 4.45216303574441e-05,
|
|
"loss": 3.2048,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 4.4499771174854554e-05,
|
|
"loss": 3.18,
|
|
"step": 5530
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 4.44778738579154e-05,
|
|
"loss": 3.1876,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 4.445593844944969e-05,
|
|
"loss": 3.1565,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 4.4433964992355e-05,
|
|
"loss": 3.1507,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 4.441195352960328e-05,
|
|
"loss": 3.1713,
|
|
"step": 5570
|
|
},
|
|
{
|
|
"epoch": 1.2,
|
|
"learning_rate": 4.4389904104240825e-05,
|
|
"loss": 3.1927,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 4.436781675938817e-05,
|
|
"loss": 3.1955,
|
|
"step": 5590
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 4.434569153824e-05,
|
|
"loss": 3.1942,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 4.432352848406507e-05,
|
|
"loss": 3.1739,
|
|
"step": 5610
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 4.430132764020614e-05,
|
|
"loss": 3.1967,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 4.4279089050079845e-05,
|
|
"loss": 3.2066,
|
|
"step": 5630
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 4.4256812757176655e-05,
|
|
"loss": 3.1784,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 4.4234498805060783e-05,
|
|
"loss": 3.2184,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 4.421214723737007e-05,
|
|
"loss": 3.1781,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 4.418975809781593e-05,
|
|
"loss": 3.2037,
|
|
"step": 5670
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 4.416733143018324e-05,
|
|
"loss": 3.1782,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 4.4144867278330284e-05,
|
|
"loss": 3.1577,
|
|
"step": 5690
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 4.4122365686188635e-05,
|
|
"loss": 3.2047,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 4.409982669776312e-05,
|
|
"loss": 3.1932,
|
|
"step": 5710
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 4.407725035713166e-05,
|
|
"loss": 3.2061,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 4.405463670844523e-05,
|
|
"loss": 3.1846,
|
|
"step": 5730
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 4.403198579592779e-05,
|
|
"loss": 3.2353,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 4.400929766387615e-05,
|
|
"loss": 3.1599,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 4.398657235665991e-05,
|
|
"loss": 3.1425,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 4.396380991872139e-05,
|
|
"loss": 3.1801,
|
|
"step": 5770
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 4.39410103945755e-05,
|
|
"loss": 3.1458,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 4.3918173828809686e-05,
|
|
"loss": 3.1369,
|
|
"step": 5790
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 4.3895300266083847e-05,
|
|
"loss": 3.1809,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 4.387238975113022e-05,
|
|
"loss": 3.162,
|
|
"step": 5810
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 4.3849442328753296e-05,
|
|
"loss": 3.2025,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 4.3826458043829764e-05,
|
|
"loss": 3.2123,
|
|
"step": 5830
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 4.3803436941308405e-05,
|
|
"loss": 3.1533,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 4.3780379066209985e-05,
|
|
"loss": 3.1517,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 4.375728446362719e-05,
|
|
"loss": 3.1786,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 4.373415317872454e-05,
|
|
"loss": 3.1428,
|
|
"step": 5870
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 4.371098525673826e-05,
|
|
"loss": 3.195,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 4.3687780742976275e-05,
|
|
"loss": 3.1393,
|
|
"step": 5890
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 4.366453968281803e-05,
|
|
"loss": 3.1745,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 4.3641262121714455e-05,
|
|
"loss": 3.1386,
|
|
"step": 5910
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 4.3617948105187864e-05,
|
|
"loss": 3.2105,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 4.359459767883186e-05,
|
|
"loss": 3.1338,
|
|
"step": 5930
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 4.357121088831124e-05,
|
|
"loss": 3.148,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 4.3547787779361955e-05,
|
|
"loss": 3.1411,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 4.352432839779093e-05,
|
|
"loss": 3.1104,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 4.350083278947606e-05,
|
|
"loss": 3.1404,
|
|
"step": 5970
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 4.3477301000366075e-05,
|
|
"loss": 3.1452,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 4.3453733076480466e-05,
|
|
"loss": 3.1881,
|
|
"step": 5990
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 4.343012906390937e-05,
|
|
"loss": 3.1552,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 4.3406489008813535e-05,
|
|
"loss": 3.2142,
|
|
"step": 6010
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 4.338281295742417e-05,
|
|
"loss": 3.1566,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 4.3359100956042885e-05,
|
|
"loss": 3.1937,
|
|
"step": 6030
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 4.3335353051041606e-05,
|
|
"loss": 3.2105,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 4.331156928886245e-05,
|
|
"loss": 3.1797,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 4.328774971601767e-05,
|
|
"loss": 3.1629,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 4.326389437908956e-05,
|
|
"loss": 3.2062,
|
|
"step": 6070
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 4.324000332473035e-05,
|
|
"loss": 3.2231,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 1.31,
|
|
"learning_rate": 4.3216076599662114e-05,
|
|
"loss": 3.1946,
|
|
"step": 6090
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 4.319211425067668e-05,
|
|
"loss": 3.1879,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 4.3168116324635556e-05,
|
|
"loss": 3.1352,
|
|
"step": 6110
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 4.314408286846982e-05,
|
|
"loss": 3.1791,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 4.3120013929180055e-05,
|
|
"loss": 3.1685,
|
|
"step": 6130
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 4.309590955383619e-05,
|
|
"loss": 3.1246,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 4.307176978957748e-05,
|
|
"loss": 3.1601,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 4.30475946836124e-05,
|
|
"loss": 3.1144,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 4.3023384283218525e-05,
|
|
"loss": 3.1496,
|
|
"step": 6170
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 4.299913863574245e-05,
|
|
"loss": 3.2308,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 4.297485778859971e-05,
|
|
"loss": 3.1532,
|
|
"step": 6190
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 4.295054178927468e-05,
|
|
"loss": 3.1449,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 4.2926190685320454e-05,
|
|
"loss": 3.1575,
|
|
"step": 6210
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 4.290180452435881e-05,
|
|
"loss": 3.1471,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 4.287738335408007e-05,
|
|
"loss": 3.1524,
|
|
"step": 6230
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 4.285292722224302e-05,
|
|
"loss": 3.143,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 4.282843617667481e-05,
|
|
"loss": 3.1605,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 4.2803910265270905e-05,
|
|
"loss": 3.1333,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 1.35,
|
|
"learning_rate": 4.2779349535994907e-05,
|
|
"loss": 3.1821,
|
|
"step": 6270
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 4.2754754036878534e-05,
|
|
"loss": 3.177,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 4.273012381602151e-05,
|
|
"loss": 3.1561,
|
|
"step": 6290
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 4.270545892159142e-05,
|
|
"loss": 3.1645,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 4.2680759401823724e-05,
|
|
"loss": 3.1379,
|
|
"step": 6310
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 4.265602530502153e-05,
|
|
"loss": 3.1522,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 4.263125667955561e-05,
|
|
"loss": 3.144,
|
|
"step": 6330
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 4.2606453573864236e-05,
|
|
"loss": 3.1868,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 4.258161603645313e-05,
|
|
"loss": 3.1529,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 1.37,
|
|
"learning_rate": 4.255674411589534e-05,
|
|
"loss": 3.1846,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 4.2531837860831154e-05,
|
|
"loss": 3.1615,
|
|
"step": 6370
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 4.2506897319968e-05,
|
|
"loss": 3.1571,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 4.2481922542080375e-05,
|
|
"loss": 3.1169,
|
|
"step": 6390
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 4.245691357600971e-05,
|
|
"loss": 3.1826,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 4.2431870470664314e-05,
|
|
"loss": 3.1751,
|
|
"step": 6410
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 4.240679327501924e-05,
|
|
"loss": 3.1996,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 4.238168203811623e-05,
|
|
"loss": 3.1304,
|
|
"step": 6430
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 4.235653680906358e-05,
|
|
"loss": 3.1716,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 4.233135763703607e-05,
|
|
"loss": 3.1825,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 1.39,
|
|
"learning_rate": 4.230614457127488e-05,
|
|
"loss": 3.1719,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 4.228089766108742e-05,
|
|
"loss": 3.15,
|
|
"step": 6470
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 4.225561695584733e-05,
|
|
"loss": 3.1698,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 4.2230302504994355e-05,
|
|
"loss": 3.1423,
|
|
"step": 6490
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 4.220495435803419e-05,
|
|
"loss": 3.2055,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 4.217957256453844e-05,
|
|
"loss": 3.1394,
|
|
"step": 6510
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 4.215415717414454e-05,
|
|
"loss": 3.1618,
|
|
"step": 6520
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 4.2128708236555584e-05,
|
|
"loss": 3.1124,
|
|
"step": 6530
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 4.2103225801540314e-05,
|
|
"loss": 3.1919,
|
|
"step": 6540
|
|
},
|
|
{
|
|
"epoch": 1.41,
|
|
"learning_rate": 4.207770991893294e-05,
|
|
"loss": 3.1607,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 4.205216063863312e-05,
|
|
"loss": 3.1826,
|
|
"step": 6560
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 4.2026578010605796e-05,
|
|
"loss": 3.1262,
|
|
"step": 6570
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 4.2000962084881154e-05,
|
|
"loss": 3.1614,
|
|
"step": 6580
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 4.197531291155447e-05,
|
|
"loss": 3.1091,
|
|
"step": 6590
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 4.1949630540786055e-05,
|
|
"loss": 3.1554,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 4.192391502280114e-05,
|
|
"loss": 3.1163,
|
|
"step": 6610
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 4.1898166407889785e-05,
|
|
"loss": 3.1649,
|
|
"step": 6620
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 4.187238474640675e-05,
|
|
"loss": 3.1235,
|
|
"step": 6630
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 4.184657008877148e-05,
|
|
"loss": 3.1343,
|
|
"step": 6640
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 4.1820722485467865e-05,
|
|
"loss": 3.1873,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 4.179484198704431e-05,
|
|
"loss": 3.1574,
|
|
"step": 6660
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 4.176892864411348e-05,
|
|
"loss": 3.1684,
|
|
"step": 6670
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 4.174298250735232e-05,
|
|
"loss": 3.2082,
|
|
"step": 6680
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 4.171700362750188e-05,
|
|
"loss": 3.1431,
|
|
"step": 6690
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 4.169099205536725e-05,
|
|
"loss": 3.116,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 4.166494784181747e-05,
|
|
"loss": 3.1468,
|
|
"step": 6710
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 4.163887103778539e-05,
|
|
"loss": 3.1494,
|
|
"step": 6720
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 4.161276169426762e-05,
|
|
"loss": 3.1662,
|
|
"step": 6730
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 4.158661986232437e-05,
|
|
"loss": 3.1573,
|
|
"step": 6740
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 4.1560445593079424e-05,
|
|
"loss": 3.1701,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 4.1534238937719984e-05,
|
|
"loss": 3.1408,
|
|
"step": 6760
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 4.150799994749658e-05,
|
|
"loss": 3.1655,
|
|
"step": 6770
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 4.148172867372299e-05,
|
|
"loss": 3.1663,
|
|
"step": 6780
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 4.145542516777612e-05,
|
|
"loss": 3.1723,
|
|
"step": 6790
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 4.1429089481095906e-05,
|
|
"loss": 3.1329,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 4.140272166518523e-05,
|
|
"loss": 3.133,
|
|
"step": 6810
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 4.13763217716098e-05,
|
|
"loss": 3.1302,
|
|
"step": 6820
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 4.134988985199806e-05,
|
|
"loss": 3.118,
|
|
"step": 6830
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 4.132342595804108e-05,
|
|
"loss": 3.164,
|
|
"step": 6840
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 4.129693014149245e-05,
|
|
"loss": 3.1411,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 4.127040245416821e-05,
|
|
"loss": 3.1414,
|
|
"step": 6860
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 4.124384294794672e-05,
|
|
"loss": 3.1436,
|
|
"step": 6870
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 4.121725167476855e-05,
|
|
"loss": 3.2189,
|
|
"step": 6880
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 4.1190628686636425e-05,
|
|
"loss": 3.1578,
|
|
"step": 6890
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 4.116397403561507e-05,
|
|
"loss": 3.1702,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 4.113728777383112e-05,
|
|
"loss": 3.169,
|
|
"step": 6910
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 4.111056995347308e-05,
|
|
"loss": 3.1692,
|
|
"step": 6920
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 4.1083820626791116e-05,
|
|
"loss": 3.168,
|
|
"step": 6930
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 4.105703984609702e-05,
|
|
"loss": 3.1242,
|
|
"step": 6940
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 4.1030227663764135e-05,
|
|
"loss": 3.2433,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 4.100338413222716e-05,
|
|
"loss": 3.187,
|
|
"step": 6960
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 4.0976509303982135e-05,
|
|
"loss": 3.1663,
|
|
"step": 6970
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 4.09496032315863e-05,
|
|
"loss": 3.1137,
|
|
"step": 6980
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 4.0922665967658006e-05,
|
|
"loss": 3.1161,
|
|
"step": 6990
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 4.089569756487657e-05,
|
|
"loss": 3.1972,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 4.086869807598223e-05,
|
|
"loss": 3.1001,
|
|
"step": 7010
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 4.084166755377603e-05,
|
|
"loss": 3.1692,
|
|
"step": 7020
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 4.081460605111966e-05,
|
|
"loss": 3.1473,
|
|
"step": 7030
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 4.078751362093545e-05,
|
|
"loss": 3.1822,
|
|
"step": 7040
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 4.076039031620618e-05,
|
|
"loss": 3.1269,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 4.0733236189975e-05,
|
|
"loss": 3.1275,
|
|
"step": 7060
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 4.070605129534536e-05,
|
|
"loss": 3.1044,
|
|
"step": 7070
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 4.067883568548088e-05,
|
|
"loss": 3.1716,
|
|
"step": 7080
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 4.065158941360523e-05,
|
|
"loss": 3.1244,
|
|
"step": 7090
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 4.062431253300205e-05,
|
|
"loss": 3.1419,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 4.059700509701485e-05,
|
|
"loss": 3.1206,
|
|
"step": 7110
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 4.05696671590469e-05,
|
|
"loss": 3.0614,
|
|
"step": 7120
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 4.054229877256108e-05,
|
|
"loss": 3.1817,
|
|
"step": 7130
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 4.0514899991079876e-05,
|
|
"loss": 3.1441,
|
|
"step": 7140
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 4.048747086818516e-05,
|
|
"loss": 3.148,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 4.046001145751818e-05,
|
|
"loss": 3.1507,
|
|
"step": 7160
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 4.043252181277939e-05,
|
|
"loss": 3.1917,
|
|
"step": 7170
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 4.040500198772838e-05,
|
|
"loss": 3.1355,
|
|
"step": 7180
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 4.037745203618377e-05,
|
|
"loss": 3.1637,
|
|
"step": 7190
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 4.0349872012023085e-05,
|
|
"loss": 3.1545,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 4.032226196918267e-05,
|
|
"loss": 3.1878,
|
|
"step": 7210
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 4.029462196165756e-05,
|
|
"loss": 3.1394,
|
|
"step": 7220
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 4.026695204350142e-05,
|
|
"loss": 3.1647,
|
|
"step": 7230
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 4.023925226882636e-05,
|
|
"loss": 3.1424,
|
|
"step": 7240
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 4.0211522691802924e-05,
|
|
"loss": 3.0862,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 4.0183763366659934e-05,
|
|
"loss": 3.1946,
|
|
"step": 7260
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 4.0155974347684353e-05,
|
|
"loss": 3.1213,
|
|
"step": 7270
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 4.012815568922125e-05,
|
|
"loss": 3.1879,
|
|
"step": 7280
|
|
},
|
|
{
|
|
"epoch": 1.57,
|
|
"learning_rate": 4.010030744567365e-05,
|
|
"loss": 3.1317,
|
|
"step": 7290
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 4.007242967150242e-05,
|
|
"loss": 3.1562,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 4.0044522421226184e-05,
|
|
"loss": 3.137,
|
|
"step": 7310
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 4.001658574942123e-05,
|
|
"loss": 3.1713,
|
|
"step": 7320
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 3.998861971072136e-05,
|
|
"loss": 3.182,
|
|
"step": 7330
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 3.996062435981782e-05,
|
|
"loss": 3.1464,
|
|
"step": 7340
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 3.993259975145917e-05,
|
|
"loss": 3.1696,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 3.99045459404512e-05,
|
|
"loss": 3.1995,
|
|
"step": 7360
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 3.9876462981656806e-05,
|
|
"loss": 3.1175,
|
|
"step": 7370
|
|
},
|
|
{
|
|
"epoch": 1.59,
|
|
"learning_rate": 3.984835092999586e-05,
|
|
"loss": 3.1136,
|
|
"step": 7380
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 3.982020984044517e-05,
|
|
"loss": 3.1157,
|
|
"step": 7390
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 3.979203976803832e-05,
|
|
"loss": 3.1381,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 3.976384076786554e-05,
|
|
"loss": 3.1718,
|
|
"step": 7410
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 3.973561289507366e-05,
|
|
"loss": 3.1321,
|
|
"step": 7420
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 3.9707356204865996e-05,
|
|
"loss": 3.137,
|
|
"step": 7430
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 3.967907075250219e-05,
|
|
"loss": 3.1893,
|
|
"step": 7440
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 3.965075659329813e-05,
|
|
"loss": 3.1326,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 3.9622413782625854e-05,
|
|
"loss": 3.1343,
|
|
"step": 7460
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 3.959404237591344e-05,
|
|
"loss": 3.1807,
|
|
"step": 7470
|
|
},
|
|
{
|
|
"epoch": 1.61,
|
|
"learning_rate": 3.9565642428644866e-05,
|
|
"loss": 3.1216,
|
|
"step": 7480
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 3.953721399635995e-05,
|
|
"loss": 3.1246,
|
|
"step": 7490
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 3.9508757134654196e-05,
|
|
"loss": 3.1527,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 3.9480271899178724e-05,
|
|
"loss": 3.124,
|
|
"step": 7510
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 3.945175834564011e-05,
|
|
"loss": 3.1283,
|
|
"step": 7520
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 3.942321652980037e-05,
|
|
"loss": 3.1565,
|
|
"step": 7530
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 3.939464650747672e-05,
|
|
"loss": 3.1486,
|
|
"step": 7540
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 3.9366048334541585e-05,
|
|
"loss": 3.1738,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 3.933742206692243e-05,
|
|
"loss": 3.1078,
|
|
"step": 7560
|
|
},
|
|
{
|
|
"epoch": 1.63,
|
|
"learning_rate": 3.930876776060164e-05,
|
|
"loss": 3.1508,
|
|
"step": 7570
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 3.928008547161648e-05,
|
|
"loss": 3.1255,
|
|
"step": 7580
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 3.925137525605891e-05,
|
|
"loss": 3.1683,
|
|
"step": 7590
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 3.922263717007549e-05,
|
|
"loss": 3.1328,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 3.919387126986731e-05,
|
|
"loss": 3.1678,
|
|
"step": 7610
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 3.916507761168984e-05,
|
|
"loss": 3.1351,
|
|
"step": 7620
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 3.913625625185285e-05,
|
|
"loss": 3.1499,
|
|
"step": 7630
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 3.910740724672027e-05,
|
|
"loss": 3.2316,
|
|
"step": 7640
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 3.90785306527101e-05,
|
|
"loss": 3.0708,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 1.65,
|
|
"learning_rate": 3.904962652629428e-05,
|
|
"loss": 3.1177,
|
|
"step": 7660
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 3.9020694923998624e-05,
|
|
"loss": 3.1329,
|
|
"step": 7670
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 3.899173590240264e-05,
|
|
"loss": 3.0913,
|
|
"step": 7680
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 3.8962749518139486e-05,
|
|
"loss": 3.1613,
|
|
"step": 7690
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 3.893373582789582e-05,
|
|
"loss": 3.1215,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 3.890469488841171e-05,
|
|
"loss": 3.1652,
|
|
"step": 7710
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 3.88756267564805e-05,
|
|
"loss": 3.1852,
|
|
"step": 7720
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 3.884653148894871e-05,
|
|
"loss": 3.1674,
|
|
"step": 7730
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 3.881740914271596e-05,
|
|
"loss": 3.1452,
|
|
"step": 7740
|
|
},
|
|
{
|
|
"epoch": 1.67,
|
|
"learning_rate": 3.878825977473478e-05,
|
|
"loss": 3.1016,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 3.8759083442010584e-05,
|
|
"loss": 3.1139,
|
|
"step": 7760
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 3.872988020160149e-05,
|
|
"loss": 3.1258,
|
|
"step": 7770
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 3.870065011061825e-05,
|
|
"loss": 3.1106,
|
|
"step": 7780
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 3.867139322622413e-05,
|
|
"loss": 3.1516,
|
|
"step": 7790
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 3.864210960563478e-05,
|
|
"loss": 3.1169,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 3.8612799306118156e-05,
|
|
"loss": 3.1084,
|
|
"step": 7810
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 3.8583462384994374e-05,
|
|
"loss": 3.1573,
|
|
"step": 7820
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 3.85540988996356e-05,
|
|
"loss": 3.1087,
|
|
"step": 7830
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 3.852470890746599e-05,
|
|
"loss": 3.1467,
|
|
"step": 7840
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 3.849529246596151e-05,
|
|
"loss": 3.1378,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 3.846584963264983e-05,
|
|
"loss": 3.1572,
|
|
"step": 7860
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 3.843638046511028e-05,
|
|
"loss": 3.1425,
|
|
"step": 7870
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 3.840688502097365e-05,
|
|
"loss": 3.1609,
|
|
"step": 7880
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 3.8377363357922156e-05,
|
|
"loss": 3.0867,
|
|
"step": 7890
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 3.834781553368924e-05,
|
|
"loss": 3.1533,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 3.831824160605955e-05,
|
|
"loss": 3.1032,
|
|
"step": 7910
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 3.828864163286875e-05,
|
|
"loss": 3.1066,
|
|
"step": 7920
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 3.8259015672003464e-05,
|
|
"loss": 3.1348,
|
|
"step": 7930
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 3.822936378140113e-05,
|
|
"loss": 3.2036,
|
|
"step": 7940
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 3.8199686019049894e-05,
|
|
"loss": 3.181,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 3.816998244298849e-05,
|
|
"loss": 3.1022,
|
|
"step": 7960
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 3.814025311130614e-05,
|
|
"loss": 3.1287,
|
|
"step": 7970
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 3.8110498082142445e-05,
|
|
"loss": 3.1466,
|
|
"step": 7980
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 3.808071741368723e-05,
|
|
"loss": 3.1357,
|
|
"step": 7990
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 3.8050911164180507e-05,
|
|
"loss": 3.1281,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 3.802107939191228e-05,
|
|
"loss": 3.1201,
|
|
"step": 8010
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 3.7991222155222484e-05,
|
|
"loss": 3.1409,
|
|
"step": 8020
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 3.796133951250083e-05,
|
|
"loss": 3.1032,
|
|
"step": 8030
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 3.793143152218673e-05,
|
|
"loss": 3.1526,
|
|
"step": 8040
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 3.790149824276919e-05,
|
|
"loss": 3.1561,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 3.787153973278662e-05,
|
|
"loss": 3.1016,
|
|
"step": 8060
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 3.784155605082681e-05,
|
|
"loss": 3.1152,
|
|
"step": 8070
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 3.781154725552677e-05,
|
|
"loss": 3.1269,
|
|
"step": 8080
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 3.778151340557261e-05,
|
|
"loss": 3.1753,
|
|
"step": 8090
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 3.775145455969946e-05,
|
|
"loss": 3.1708,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 3.7721370776691305e-05,
|
|
"loss": 3.1453,
|
|
"step": 8110
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 3.769126211538092e-05,
|
|
"loss": 3.1866,
|
|
"step": 8120
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 3.7661128634649737e-05,
|
|
"loss": 3.1268,
|
|
"step": 8130
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 3.763097039342768e-05,
|
|
"loss": 3.1154,
|
|
"step": 8140
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 3.760078745069316e-05,
|
|
"loss": 3.1447,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 3.757057986547285e-05,
|
|
"loss": 3.1837,
|
|
"step": 8160
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 3.754034769684164e-05,
|
|
"loss": 3.1581,
|
|
"step": 8170
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 3.751009100392247e-05,
|
|
"loss": 3.126,
|
|
"step": 8180
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 3.747980984588626e-05,
|
|
"loss": 3.0992,
|
|
"step": 8190
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 3.744950428195178e-05,
|
|
"loss": 3.1455,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 3.7419174371385504e-05,
|
|
"loss": 3.128,
|
|
"step": 8210
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 3.7388820173501545e-05,
|
|
"loss": 3.0823,
|
|
"step": 8220
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 3.735844174766149e-05,
|
|
"loss": 3.1004,
|
|
"step": 8230
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 3.732803915327434e-05,
|
|
"loss": 3.1647,
|
|
"step": 8240
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 3.729761244979631e-05,
|
|
"loss": 3.1464,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 3.726716169673082e-05,
|
|
"loss": 3.1272,
|
|
"step": 8260
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 3.723668695362827e-05,
|
|
"loss": 3.1407,
|
|
"step": 8270
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 3.720618828008604e-05,
|
|
"loss": 3.1695,
|
|
"step": 8280
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 3.717566573574822e-05,
|
|
"loss": 3.1004,
|
|
"step": 8290
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 3.7145119380305674e-05,
|
|
"loss": 3.1826,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 3.7114549273495764e-05,
|
|
"loss": 3.1372,
|
|
"step": 8310
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 3.708395547510234e-05,
|
|
"loss": 3.1596,
|
|
"step": 8320
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 3.7053338044955566e-05,
|
|
"loss": 3.183,
|
|
"step": 8330
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 3.702269704293182e-05,
|
|
"loss": 3.1429,
|
|
"step": 8340
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 3.69920325289536e-05,
|
|
"loss": 3.1234,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 3.6961344562989354e-05,
|
|
"loss": 3.1404,
|
|
"step": 8360
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 3.6930633205053414e-05,
|
|
"loss": 3.1519,
|
|
"step": 8370
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 3.6899898515205856e-05,
|
|
"loss": 3.174,
|
|
"step": 8380
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 3.6869140553552374e-05,
|
|
"loss": 3.147,
|
|
"step": 8390
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 3.683835938024418e-05,
|
|
"loss": 3.1018,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 3.680755505547788e-05,
|
|
"loss": 3.1384,
|
|
"step": 8410
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 3.677672763949536e-05,
|
|
"loss": 3.0691,
|
|
"step": 8420
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 3.674587719258365e-05,
|
|
"loss": 3.1424,
|
|
"step": 8430
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 3.671500377507482e-05,
|
|
"loss": 3.144,
|
|
"step": 8440
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 3.668410744734589e-05,
|
|
"loss": 3.1379,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 3.665318826981867e-05,
|
|
"loss": 3.1232,
|
|
"step": 8460
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 3.662224630295963e-05,
|
|
"loss": 3.12,
|
|
"step": 8470
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 3.659128160727983e-05,
|
|
"loss": 3.1597,
|
|
"step": 8480
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 3.656029424333479e-05,
|
|
"loss": 3.1177,
|
|
"step": 8490
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 3.652928427172434e-05,
|
|
"loss": 3.156,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 3.649825175309253e-05,
|
|
"loss": 3.128,
|
|
"step": 8510
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 3.646719674812751e-05,
|
|
"loss": 3.1341,
|
|
"step": 8520
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 3.643611931756139e-05,
|
|
"loss": 3.1123,
|
|
"step": 8530
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 3.640501952217015e-05,
|
|
"loss": 3.1392,
|
|
"step": 8540
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 3.637389742277348e-05,
|
|
"loss": 3.132,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 3.6342753080234754e-05,
|
|
"loss": 3.0704,
|
|
"step": 8560
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 3.6311586555460776e-05,
|
|
"loss": 3.1512,
|
|
"step": 8570
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 3.6280397909401756e-05,
|
|
"loss": 3.081,
|
|
"step": 8580
|
|
},
|
|
{
|
|
"epoch": 1.85,
|
|
"learning_rate": 3.624918720305117e-05,
|
|
"loss": 3.1163,
|
|
"step": 8590
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 3.621795449744562e-05,
|
|
"loss": 3.096,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 3.6186699853664755e-05,
|
|
"loss": 3.0679,
|
|
"step": 8610
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 3.615542333283112e-05,
|
|
"loss": 3.1313,
|
|
"step": 8620
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 3.6124124996110015e-05,
|
|
"loss": 3.0716,
|
|
"step": 8630
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 3.609280490470944e-05,
|
|
"loss": 3.0907,
|
|
"step": 8640
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 3.6061463119879915e-05,
|
|
"loss": 3.136,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 3.60300997029144e-05,
|
|
"loss": 3.1074,
|
|
"step": 8660
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 3.5998714715148165e-05,
|
|
"loss": 3.0909,
|
|
"step": 8670
|
|
},
|
|
{
|
|
"epoch": 1.87,
|
|
"learning_rate": 3.596730821795863e-05,
|
|
"loss": 3.0746,
|
|
"step": 8680
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 3.5935880272765325e-05,
|
|
"loss": 3.1502,
|
|
"step": 8690
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 3.590443094102969e-05,
|
|
"loss": 3.1832,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 3.5872960284255e-05,
|
|
"loss": 3.1386,
|
|
"step": 8710
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 3.584146836398624e-05,
|
|
"loss": 3.1602,
|
|
"step": 8720
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 3.5809955241809964e-05,
|
|
"loss": 3.1216,
|
|
"step": 8730
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 3.5778420979354216e-05,
|
|
"loss": 3.1161,
|
|
"step": 8740
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 3.5746865638288344e-05,
|
|
"loss": 3.1314,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 3.571528928032296e-05,
|
|
"loss": 3.0919,
|
|
"step": 8760
|
|
},
|
|
{
|
|
"epoch": 1.89,
|
|
"learning_rate": 3.5683691967209745e-05,
|
|
"loss": 3.1507,
|
|
"step": 8770
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 3.565207376074138e-05,
|
|
"loss": 3.1514,
|
|
"step": 8780
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 3.562043472275139e-05,
|
|
"loss": 3.1263,
|
|
"step": 8790
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 3.558877491511405e-05,
|
|
"loss": 3.1518,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 3.555709439974424e-05,
|
|
"loss": 3.1468,
|
|
"step": 8810
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 3.552539323859736e-05,
|
|
"loss": 3.1016,
|
|
"step": 8820
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 3.549367149366916e-05,
|
|
"loss": 3.1782,
|
|
"step": 8830
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 3.5461929226995675e-05,
|
|
"loss": 3.1095,
|
|
"step": 8840
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 3.5430166500653025e-05,
|
|
"loss": 3.0431,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 3.5398383376757385e-05,
|
|
"loss": 3.1664,
|
|
"step": 8860
|
|
},
|
|
{
|
|
"epoch": 1.91,
|
|
"learning_rate": 3.5366579917464804e-05,
|
|
"loss": 3.1158,
|
|
"step": 8870
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 3.533475618497111e-05,
|
|
"loss": 3.071,
|
|
"step": 8880
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 3.5302912241511756e-05,
|
|
"loss": 3.1498,
|
|
"step": 8890
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 3.5271048149361744e-05,
|
|
"loss": 3.0992,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 3.523916397083546e-05,
|
|
"loss": 3.1064,
|
|
"step": 8910
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 3.520725976828658e-05,
|
|
"loss": 3.1639,
|
|
"step": 8920
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 3.5175335604107934e-05,
|
|
"loss": 3.1616,
|
|
"step": 8930
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 3.5143391540731415e-05,
|
|
"loss": 3.1242,
|
|
"step": 8940
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 3.5111427640627794e-05,
|
|
"loss": 3.075,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 1.93,
|
|
"learning_rate": 3.507944396630666e-05,
|
|
"loss": 3.0899,
|
|
"step": 8960
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 3.504744058031625e-05,
|
|
"loss": 3.1955,
|
|
"step": 8970
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 3.501541754524339e-05,
|
|
"loss": 3.1204,
|
|
"step": 8980
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 3.49833749237133e-05,
|
|
"loss": 3.1625,
|
|
"step": 8990
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 3.4951312778389504e-05,
|
|
"loss": 3.1284,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 3.4919231171973724e-05,
|
|
"loss": 3.0868,
|
|
"step": 9010
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 3.488713016720573e-05,
|
|
"loss": 3.1107,
|
|
"step": 9020
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 3.4855009826863225e-05,
|
|
"loss": 3.1362,
|
|
"step": 9030
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 3.482287021376173e-05,
|
|
"loss": 3.0686,
|
|
"step": 9040
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 3.479071139075446e-05,
|
|
"loss": 3.1225,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 3.4758533420732195e-05,
|
|
"loss": 3.156,
|
|
"step": 9060
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 3.472633636662316e-05,
|
|
"loss": 3.1497,
|
|
"step": 9070
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 3.469412029139289e-05,
|
|
"loss": 3.0961,
|
|
"step": 9080
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 3.466188525804414e-05,
|
|
"loss": 3.0989,
|
|
"step": 9090
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 3.462963132961672e-05,
|
|
"loss": 3.1339,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 3.459735856918741e-05,
|
|
"loss": 3.1381,
|
|
"step": 9110
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 3.45650670398698e-05,
|
|
"loss": 3.104,
|
|
"step": 9120
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 3.453275680481419e-05,
|
|
"loss": 3.1142,
|
|
"step": 9130
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 3.450042792720748e-05,
|
|
"loss": 3.1381,
|
|
"step": 9140
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 3.4468080470273e-05,
|
|
"loss": 3.0822,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 3.443571449727042e-05,
|
|
"loss": 3.1343,
|
|
"step": 9160
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 3.440333007149565e-05,
|
|
"loss": 3.1531,
|
|
"step": 9170
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 3.4370927256280654e-05,
|
|
"loss": 3.0654,
|
|
"step": 9180
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"learning_rate": 3.433850611499336e-05,
|
|
"loss": 3.1617,
|
|
"step": 9190
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 3.4306066711037566e-05,
|
|
"loss": 3.1418,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 3.427360910785275e-05,
|
|
"loss": 3.0834,
|
|
"step": 9210
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 3.424113336891399e-05,
|
|
"loss": 3.1146,
|
|
"step": 9220
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 3.420863955773184e-05,
|
|
"loss": 3.1358,
|
|
"step": 9230
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 3.4176127737852196e-05,
|
|
"loss": 3.1704,
|
|
"step": 9240
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 3.4143597972856176e-05,
|
|
"loss": 3.1462,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 3.411105032635996e-05,
|
|
"loss": 3.176,
|
|
"step": 9260
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_loss": 3.116755962371826,
|
|
"eval_runtime": 192.2124,
|
|
"eval_samples_per_second": 771.126,
|
|
"eval_steps_per_second": 24.098,
|
|
"step": 9264
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 3.407848486201474e-05,
|
|
"loss": 3.1164,
|
|
"step": 9270
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"learning_rate": 3.404590164350654e-05,
|
|
"loss": 3.1071,
|
|
"step": 9280
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 3.401330073455609e-05,
|
|
"loss": 3.104,
|
|
"step": 9290
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 3.3980682198918724e-05,
|
|
"loss": 3.1238,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 3.394804610038425e-05,
|
|
"loss": 3.1772,
|
|
"step": 9310
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 3.391539250277683e-05,
|
|
"loss": 3.0895,
|
|
"step": 9320
|
|
},
|
|
{
|
|
"epoch": 2.01,
|
|
"learning_rate": 3.3882721469954836e-05,
|
|
"loss": 3.1571,
|
|
"step": 9330
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 3.385003306581074e-05,
|
|
"loss": 3.0622,
|
|
"step": 9340
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 3.381732735427098e-05,
|
|
"loss": 3.0843,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 3.378460439929585e-05,
|
|
"loss": 3.1206,
|
|
"step": 9360
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 3.3751864264879365e-05,
|
|
"loss": 3.1425,
|
|
"step": 9370
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 3.371910701504913e-05,
|
|
"loss": 3.1208,
|
|
"step": 9380
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 3.368633271386624e-05,
|
|
"loss": 3.1281,
|
|
"step": 9390
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 3.36535414254251e-05,
|
|
"loss": 3.11,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 3.3620733213853375e-05,
|
|
"loss": 3.1245,
|
|
"step": 9410
|
|
},
|
|
{
|
|
"epoch": 2.03,
|
|
"learning_rate": 3.3587908143311794e-05,
|
|
"loss": 3.0622,
|
|
"step": 9420
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 3.3555066277994086e-05,
|
|
"loss": 3.1447,
|
|
"step": 9430
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 3.3522207682126794e-05,
|
|
"loss": 3.1086,
|
|
"step": 9440
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 3.3489332419969214e-05,
|
|
"loss": 3.1308,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 3.345644055581319e-05,
|
|
"loss": 3.1152,
|
|
"step": 9460
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 3.342353215398307e-05,
|
|
"loss": 3.1107,
|
|
"step": 9470
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 3.339060727883554e-05,
|
|
"loss": 3.1278,
|
|
"step": 9480
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 3.335766599475948e-05,
|
|
"loss": 3.1098,
|
|
"step": 9490
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 3.332470836617589e-05,
|
|
"loss": 3.1251,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 2.05,
|
|
"learning_rate": 3.329173445753771e-05,
|
|
"loss": 3.0778,
|
|
"step": 9510
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 3.325874433332972e-05,
|
|
"loss": 3.1043,
|
|
"step": 9520
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 3.3225738058068425e-05,
|
|
"loss": 3.0784,
|
|
"step": 9530
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 3.3192715696301895e-05,
|
|
"loss": 3.1185,
|
|
"step": 9540
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 3.315967731260969e-05,
|
|
"loss": 3.0843,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 3.312662297160267e-05,
|
|
"loss": 3.0896,
|
|
"step": 9560
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 3.309355273792292e-05,
|
|
"loss": 3.1469,
|
|
"step": 9570
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 3.306046667624361e-05,
|
|
"loss": 3.0886,
|
|
"step": 9580
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 3.302736485126885e-05,
|
|
"loss": 3.0841,
|
|
"step": 9590
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 3.2994247327733566e-05,
|
|
"loss": 3.1063,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 3.2961114170403436e-05,
|
|
"loss": 3.0773,
|
|
"step": 9610
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 3.2927965444074646e-05,
|
|
"loss": 3.0848,
|
|
"step": 9620
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 3.289480121357388e-05,
|
|
"loss": 3.1147,
|
|
"step": 9630
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 3.286162154375811e-05,
|
|
"loss": 3.1065,
|
|
"step": 9640
|
|
},
|
|
{
|
|
"epoch": 2.08,
|
|
"learning_rate": 3.282842649951451e-05,
|
|
"loss": 3.0612,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 3.279521614576034e-05,
|
|
"loss": 3.1294,
|
|
"step": 9660
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 3.276199054744279e-05,
|
|
"loss": 3.0764,
|
|
"step": 9670
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 3.272874976953883e-05,
|
|
"loss": 3.1182,
|
|
"step": 9680
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 3.269549387705517e-05,
|
|
"loss": 3.1077,
|
|
"step": 9690
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 3.2662222935028036e-05,
|
|
"loss": 3.0921,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 3.2628937008523106e-05,
|
|
"loss": 3.1092,
|
|
"step": 9710
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 3.259563616263536e-05,
|
|
"loss": 3.0678,
|
|
"step": 9720
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 3.256232046248895e-05,
|
|
"loss": 3.1179,
|
|
"step": 9730
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 3.252898997323707e-05,
|
|
"loss": 3.1153,
|
|
"step": 9740
|
|
},
|
|
{
|
|
"epoch": 2.1,
|
|
"learning_rate": 3.249564476006187e-05,
|
|
"loss": 3.0629,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 3.246228488817424e-05,
|
|
"loss": 3.1144,
|
|
"step": 9760
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 3.2428910422813786e-05,
|
|
"loss": 3.0786,
|
|
"step": 9770
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 3.239552142924862e-05,
|
|
"loss": 3.1056,
|
|
"step": 9780
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 3.23621179727753e-05,
|
|
"loss": 3.1511,
|
|
"step": 9790
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 3.232870011871863e-05,
|
|
"loss": 3.0994,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 3.22952679324316e-05,
|
|
"loss": 3.1298,
|
|
"step": 9810
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 3.2261821479295214e-05,
|
|
"loss": 3.1427,
|
|
"step": 9820
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 3.222836082471838e-05,
|
|
"loss": 3.1254,
|
|
"step": 9830
|
|
},
|
|
{
|
|
"epoch": 2.12,
|
|
"learning_rate": 3.219488603413777e-05,
|
|
"loss": 3.0876,
|
|
"step": 9840
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 3.2161397173017727e-05,
|
|
"loss": 3.0734,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 3.2127894306850084e-05,
|
|
"loss": 3.0834,
|
|
"step": 9860
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 3.209437750115407e-05,
|
|
"loss": 3.1251,
|
|
"step": 9870
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 3.206084682147617e-05,
|
|
"loss": 3.109,
|
|
"step": 9880
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 3.202730233339002e-05,
|
|
"loss": 3.1238,
|
|
"step": 9890
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 3.199374410249625e-05,
|
|
"loss": 3.0921,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 3.1960172194422355e-05,
|
|
"loss": 3.0811,
|
|
"step": 9910
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 3.192658667482259e-05,
|
|
"loss": 3.1357,
|
|
"step": 9920
|
|
},
|
|
{
|
|
"epoch": 2.14,
|
|
"learning_rate": 3.189298760937782e-05,
|
|
"loss": 3.1182,
|
|
"step": 9930
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 3.185937506379542e-05,
|
|
"loss": 3.1047,
|
|
"step": 9940
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 3.18257491038091e-05,
|
|
"loss": 3.1334,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 3.1792109795178825e-05,
|
|
"loss": 3.1351,
|
|
"step": 9960
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 3.1758457203690655e-05,
|
|
"loss": 3.1448,
|
|
"step": 9970
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 3.1724791395156625e-05,
|
|
"loss": 3.1141,
|
|
"step": 9980
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.169111243541462e-05,
|
|
"loss": 3.1356,
|
|
"step": 9990
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.165742039032825e-05,
|
|
"loss": 3.1419,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.1623715325786715e-05,
|
|
"loss": 3.1144,
|
|
"step": 10010
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 3.158999730770465e-05,
|
|
"loss": 3.1057,
|
|
"step": 10020
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.155626640202207e-05,
|
|
"loss": 3.0705,
|
|
"step": 10030
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.152252267470416e-05,
|
|
"loss": 3.1277,
|
|
"step": 10040
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.148876619174117e-05,
|
|
"loss": 3.0692,
|
|
"step": 10050
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.145499701914833e-05,
|
|
"loss": 3.1255,
|
|
"step": 10060
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 3.142121522296566e-05,
|
|
"loss": 3.102,
|
|
"step": 10070
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.138742086925788e-05,
|
|
"loss": 3.1029,
|
|
"step": 10080
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.1353614024114244e-05,
|
|
"loss": 3.094,
|
|
"step": 10090
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.131979475364848e-05,
|
|
"loss": 3.0889,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.128596312399858e-05,
|
|
"loss": 3.1127,
|
|
"step": 10110
|
|
},
|
|
{
|
|
"epoch": 2.18,
|
|
"learning_rate": 3.1252119201326705e-05,
|
|
"loss": 3.1553,
|
|
"step": 10120
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.121826305181909e-05,
|
|
"loss": 3.0632,
|
|
"step": 10130
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.1184394741685816e-05,
|
|
"loss": 3.107,
|
|
"step": 10140
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.115051433716083e-05,
|
|
"loss": 3.0981,
|
|
"step": 10150
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 3.111662190450168e-05,
|
|
"loss": 3.1207,
|
|
"step": 10160
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.1082717509989416e-05,
|
|
"loss": 3.0722,
|
|
"step": 10170
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.104880121992855e-05,
|
|
"loss": 3.1075,
|
|
"step": 10180
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.101487310064678e-05,
|
|
"loss": 3.096,
|
|
"step": 10190
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.0980933218495005e-05,
|
|
"loss": 3.1503,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 3.0946981639847084e-05,
|
|
"loss": 3.0735,
|
|
"step": 10210
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.091301843109977e-05,
|
|
"loss": 3.0709,
|
|
"step": 10220
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.087904365867254e-05,
|
|
"loss": 3.1344,
|
|
"step": 10230
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.084505738900753e-05,
|
|
"loss": 3.1359,
|
|
"step": 10240
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 3.0811059688569287e-05,
|
|
"loss": 3.0743,
|
|
"step": 10250
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.077705062384479e-05,
|
|
"loss": 3.1118,
|
|
"step": 10260
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.074303026134319e-05,
|
|
"loss": 3.138,
|
|
"step": 10270
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.070899866759575e-05,
|
|
"loss": 3.0892,
|
|
"step": 10280
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.06749559091557e-05,
|
|
"loss": 3.0744,
|
|
"step": 10290
|
|
},
|
|
{
|
|
"epoch": 2.22,
|
|
"learning_rate": 3.064090205259811e-05,
|
|
"loss": 3.1033,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.060683716451973e-05,
|
|
"loss": 3.133,
|
|
"step": 10310
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.0572761311538914e-05,
|
|
"loss": 3.1381,
|
|
"step": 10320
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.0538674560295423e-05,
|
|
"loss": 3.0978,
|
|
"step": 10330
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.0504576977450367e-05,
|
|
"loss": 3.0755,
|
|
"step": 10340
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 3.0470468629686016e-05,
|
|
"loss": 3.1048,
|
|
"step": 10350
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.0436349583705704e-05,
|
|
"loss": 3.159,
|
|
"step": 10360
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.0402219906233676e-05,
|
|
"loss": 3.0511,
|
|
"step": 10370
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.036807966401498e-05,
|
|
"loss": 3.1379,
|
|
"step": 10380
|
|
},
|
|
{
|
|
"epoch": 2.24,
|
|
"learning_rate": 3.0333928923815326e-05,
|
|
"loss": 3.1011,
|
|
"step": 10390
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.0299767752420926e-05,
|
|
"loss": 3.1319,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.026559621663843e-05,
|
|
"loss": 3.1408,
|
|
"step": 10410
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.0231414383294736e-05,
|
|
"loss": 3.092,
|
|
"step": 10420
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.019722231923689e-05,
|
|
"loss": 3.1126,
|
|
"step": 10430
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 3.0163020091331928e-05,
|
|
"loss": 3.1085,
|
|
"step": 10440
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.0128807766466776e-05,
|
|
"loss": 3.0773,
|
|
"step": 10450
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.009458541154811e-05,
|
|
"loss": 3.13,
|
|
"step": 10460
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.0060353093502215e-05,
|
|
"loss": 3.1377,
|
|
"step": 10470
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 3.0026110879274854e-05,
|
|
"loss": 3.1241,
|
|
"step": 10480
|
|
},
|
|
{
|
|
"epoch": 2.26,
|
|
"learning_rate": 2.9991858835831155e-05,
|
|
"loss": 3.0861,
|
|
"step": 10490
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 2.9957597030155455e-05,
|
|
"loss": 3.1324,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 2.99233255292512e-05,
|
|
"loss": 3.0698,
|
|
"step": 10510
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 2.9889044400140785e-05,
|
|
"loss": 3.0967,
|
|
"step": 10520
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 2.9854753709865434e-05,
|
|
"loss": 3.0755,
|
|
"step": 10530
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.982045352548507e-05,
|
|
"loss": 3.12,
|
|
"step": 10540
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.9786143914078184e-05,
|
|
"loss": 3.0749,
|
|
"step": 10550
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.9751824942741708e-05,
|
|
"loss": 3.1039,
|
|
"step": 10560
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.9717496678590868e-05,
|
|
"loss": 3.129,
|
|
"step": 10570
|
|
},
|
|
{
|
|
"epoch": 2.28,
|
|
"learning_rate": 2.9683159188759065e-05,
|
|
"loss": 3.0406,
|
|
"step": 10580
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.9648812540397746e-05,
|
|
"loss": 3.0813,
|
|
"step": 10590
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.9614456800676276e-05,
|
|
"loss": 3.1081,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.9580092036781792e-05,
|
|
"loss": 3.0899,
|
|
"step": 10610
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.9545718315919074e-05,
|
|
"loss": 3.1271,
|
|
"step": 10620
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 2.9511335705310416e-05,
|
|
"loss": 3.1272,
|
|
"step": 10630
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.9476944272195518e-05,
|
|
"loss": 3.1155,
|
|
"step": 10640
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.9442544083831307e-05,
|
|
"loss": 3.1526,
|
|
"step": 10650
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.9408135207491853e-05,
|
|
"loss": 3.1156,
|
|
"step": 10660
|
|
},
|
|
{
|
|
"epoch": 2.3,
|
|
"learning_rate": 2.9373717710468213e-05,
|
|
"loss": 3.0691,
|
|
"step": 10670
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.9339291660068284e-05,
|
|
"loss": 3.1182,
|
|
"step": 10680
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.9304857123616714e-05,
|
|
"loss": 3.0618,
|
|
"step": 10690
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.927041416845473e-05,
|
|
"loss": 3.1102,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.9235962861940035e-05,
|
|
"loss": 3.13,
|
|
"step": 10710
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 2.9201503271446666e-05,
|
|
"loss": 3.0638,
|
|
"step": 10720
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.916703546436484e-05,
|
|
"loss": 3.1512,
|
|
"step": 10730
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.9132559508100866e-05,
|
|
"loss": 3.1324,
|
|
"step": 10740
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.909807547007697e-05,
|
|
"loss": 3.1095,
|
|
"step": 10750
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 2.90635834177312e-05,
|
|
"loss": 3.0785,
|
|
"step": 10760
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.9029083418517268e-05,
|
|
"loss": 3.0682,
|
|
"step": 10770
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.899457553990442e-05,
|
|
"loss": 3.0904,
|
|
"step": 10780
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.896005984937734e-05,
|
|
"loss": 3.1123,
|
|
"step": 10790
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.892553641443595e-05,
|
|
"loss": 3.063,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 2.8891005302595346e-05,
|
|
"loss": 3.1219,
|
|
"step": 10810
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.8856466581385628e-05,
|
|
"loss": 3.1406,
|
|
"step": 10820
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.8821920318351774e-05,
|
|
"loss": 3.1197,
|
|
"step": 10830
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.878736658105352e-05,
|
|
"loss": 3.1293,
|
|
"step": 10840
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.87528054370652e-05,
|
|
"loss": 3.0861,
|
|
"step": 10850
|
|
},
|
|
{
|
|
"epoch": 2.34,
|
|
"learning_rate": 2.8718236953975652e-05,
|
|
"loss": 3.0714,
|
|
"step": 10860
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.8683661199388064e-05,
|
|
"loss": 3.063,
|
|
"step": 10870
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.864907824091984e-05,
|
|
"loss": 3.1496,
|
|
"step": 10880
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.8614488146202466e-05,
|
|
"loss": 3.0788,
|
|
"step": 10890
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 2.8579890982881396e-05,
|
|
"loss": 3.0164,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.8545286818615897e-05,
|
|
"loss": 3.1023,
|
|
"step": 10910
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.8510675721078937e-05,
|
|
"loss": 3.0453,
|
|
"step": 10920
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.847605775795704e-05,
|
|
"loss": 3.1209,
|
|
"step": 10930
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.844143299695015e-05,
|
|
"loss": 3.0609,
|
|
"step": 10940
|
|
},
|
|
{
|
|
"epoch": 2.36,
|
|
"learning_rate": 2.8406801505771514e-05,
|
|
"loss": 3.0713,
|
|
"step": 10950
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.837216335214753e-05,
|
|
"loss": 3.0678,
|
|
"step": 10960
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.8337518603817635e-05,
|
|
"loss": 3.0978,
|
|
"step": 10970
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.8302867328534166e-05,
|
|
"loss": 3.0728,
|
|
"step": 10980
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.826820959406221e-05,
|
|
"loss": 3.0968,
|
|
"step": 10990
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 2.8233545468179494e-05,
|
|
"loss": 3.0653,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.8198875018676247e-05,
|
|
"loss": 3.0835,
|
|
"step": 11010
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.816419831335506e-05,
|
|
"loss": 3.0784,
|
|
"step": 11020
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.812951542003076e-05,
|
|
"loss": 3.0441,
|
|
"step": 11030
|
|
},
|
|
{
|
|
"epoch": 2.38,
|
|
"learning_rate": 2.8094826406530277e-05,
|
|
"loss": 3.079,
|
|
"step": 11040
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.8060131340692515e-05,
|
|
"loss": 3.1258,
|
|
"step": 11050
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.8025430290368186e-05,
|
|
"loss": 3.0687,
|
|
"step": 11060
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.799072332341975e-05,
|
|
"loss": 3.0717,
|
|
"step": 11070
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.7956010507721193e-05,
|
|
"loss": 3.0986,
|
|
"step": 11080
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 2.7921291911157975e-05,
|
|
"loss": 3.1202,
|
|
"step": 11090
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.788656760162685e-05,
|
|
"loss": 3.0424,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.7851837647035727e-05,
|
|
"loss": 3.0636,
|
|
"step": 11110
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.7817102115303577e-05,
|
|
"loss": 3.0761,
|
|
"step": 11120
|
|
},
|
|
{
|
|
"epoch": 2.4,
|
|
"learning_rate": 2.778236107436027e-05,
|
|
"loss": 3.0874,
|
|
"step": 11130
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.774761459214645e-05,
|
|
"loss": 3.0725,
|
|
"step": 11140
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.7712862736613393e-05,
|
|
"loss": 3.095,
|
|
"step": 11150
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.7678105575722903e-05,
|
|
"loss": 3.1073,
|
|
"step": 11160
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.764334317744714e-05,
|
|
"loss": 3.1232,
|
|
"step": 11170
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 2.7608575609768516e-05,
|
|
"loss": 3.1249,
|
|
"step": 11180
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 2.7573802940679554e-05,
|
|
"loss": 3.1031,
|
|
"step": 11190
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 2.7539025238182755e-05,
|
|
"loss": 3.1128,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 2.750424257029044e-05,
|
|
"loss": 3.086,
|
|
"step": 11210
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 2.746945500502468e-05,
|
|
"loss": 3.0999,
|
|
"step": 11220
|
|
},
|
|
{
|
|
"epoch": 2.42,
|
|
"learning_rate": 2.74346626104171e-05,
|
|
"loss": 3.1157,
|
|
"step": 11230
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 2.739986545450876e-05,
|
|
"loss": 3.0799,
|
|
"step": 11240
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 2.7365063605350055e-05,
|
|
"loss": 3.1216,
|
|
"step": 11250
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 2.733025713100054e-05,
|
|
"loss": 3.102,
|
|
"step": 11260
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 2.7295446099528833e-05,
|
|
"loss": 3.1082,
|
|
"step": 11270
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 2.7260630579012437e-05,
|
|
"loss": 3.0696,
|
|
"step": 11280
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 2.7225810637537657e-05,
|
|
"loss": 3.1085,
|
|
"step": 11290
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 2.7190986343199444e-05,
|
|
"loss": 3.0841,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 2.7156157764101237e-05,
|
|
"loss": 3.0917,
|
|
"step": 11310
|
|
},
|
|
{
|
|
"epoch": 2.44,
|
|
"learning_rate": 2.7121324968354896e-05,
|
|
"loss": 3.0829,
|
|
"step": 11320
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 2.7086488024080482e-05,
|
|
"loss": 3.0723,
|
|
"step": 11330
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 2.7051646999406198e-05,
|
|
"loss": 3.0931,
|
|
"step": 11340
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 2.7016801962468218e-05,
|
|
"loss": 3.0761,
|
|
"step": 11350
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 2.698195298141057e-05,
|
|
"loss": 3.1442,
|
|
"step": 11360
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 2.6947100124384977e-05,
|
|
"loss": 3.0613,
|
|
"step": 11370
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 2.6912243459550763e-05,
|
|
"loss": 3.121,
|
|
"step": 11380
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 2.6877383055074683e-05,
|
|
"loss": 3.1307,
|
|
"step": 11390
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 2.6842518979130814e-05,
|
|
"loss": 3.1062,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 2.46,
|
|
"learning_rate": 2.680765129990041e-05,
|
|
"loss": 3.1192,
|
|
"step": 11410
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 2.677278008557177e-05,
|
|
"loss": 3.125,
|
|
"step": 11420
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 2.673790540434011e-05,
|
|
"loss": 3.1473,
|
|
"step": 11430
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 2.6703027324407427e-05,
|
|
"loss": 3.0819,
|
|
"step": 11440
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 2.6668145913982356e-05,
|
|
"loss": 3.0896,
|
|
"step": 11450
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 2.663326124128006e-05,
|
|
"loss": 3.1016,
|
|
"step": 11460
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 2.6598373374522067e-05,
|
|
"loss": 3.1391,
|
|
"step": 11470
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 2.656348238193616e-05,
|
|
"loss": 3.1086,
|
|
"step": 11480
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 2.652858833175623e-05,
|
|
"loss": 3.1175,
|
|
"step": 11490
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 2.6493691292222154e-05,
|
|
"loss": 3.0815,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 2.48,
|
|
"learning_rate": 2.6458791331579653e-05,
|
|
"loss": 3.1265,
|
|
"step": 11510
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 2.6423888518080143e-05,
|
|
"loss": 3.116,
|
|
"step": 11520
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 2.6388982919980653e-05,
|
|
"loss": 3.0215,
|
|
"step": 11530
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 2.635407460554363e-05,
|
|
"loss": 3.0756,
|
|
"step": 11540
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 2.631916364303685e-05,
|
|
"loss": 3.0693,
|
|
"step": 11550
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 2.6284250100733253e-05,
|
|
"loss": 3.0939,
|
|
"step": 11560
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 2.624933404691083e-05,
|
|
"loss": 3.0734,
|
|
"step": 11570
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 2.6214415549852493e-05,
|
|
"loss": 3.0926,
|
|
"step": 11580
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 2.617949467784592e-05,
|
|
"loss": 3.1195,
|
|
"step": 11590
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"learning_rate": 2.614457149918344e-05,
|
|
"loss": 3.051,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 2.6109646082161888e-05,
|
|
"loss": 3.1115,
|
|
"step": 11610
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 2.6074718495082472e-05,
|
|
"loss": 3.0824,
|
|
"step": 11620
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 2.6039788806250664e-05,
|
|
"loss": 3.0901,
|
|
"step": 11630
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 2.600485708397603e-05,
|
|
"loss": 3.0843,
|
|
"step": 11640
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 2.596992339657211e-05,
|
|
"loss": 3.0616,
|
|
"step": 11650
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 2.59349878123563e-05,
|
|
"loss": 3.1046,
|
|
"step": 11660
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 2.590005039964969e-05,
|
|
"loss": 3.0518,
|
|
"step": 11670
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 2.5865111226776955e-05,
|
|
"loss": 3.1193,
|
|
"step": 11680
|
|
},
|
|
{
|
|
"epoch": 2.52,
|
|
"learning_rate": 2.583017036206622e-05,
|
|
"loss": 3.1094,
|
|
"step": 11690
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 2.57952278738489e-05,
|
|
"loss": 3.0654,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 2.5760283830459604e-05,
|
|
"loss": 3.1159,
|
|
"step": 11710
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 2.5725338300235964e-05,
|
|
"loss": 3.0928,
|
|
"step": 11720
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 2.5690391351518527e-05,
|
|
"loss": 3.0995,
|
|
"step": 11730
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 2.5655443052650636e-05,
|
|
"loss": 3.0705,
|
|
"step": 11740
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 2.5620493471978234e-05,
|
|
"loss": 3.0478,
|
|
"step": 11750
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 2.55855426778498e-05,
|
|
"loss": 3.1046,
|
|
"step": 11760
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 2.5550590738616177e-05,
|
|
"loss": 3.0884,
|
|
"step": 11770
|
|
},
|
|
{
|
|
"epoch": 2.54,
|
|
"learning_rate": 2.5515637722630443e-05,
|
|
"loss": 3.0307,
|
|
"step": 11780
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 2.5480683698247793e-05,
|
|
"loss": 3.0585,
|
|
"step": 11790
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 2.5445728733825374e-05,
|
|
"loss": 3.0698,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 2.54107728977222e-05,
|
|
"loss": 3.1397,
|
|
"step": 11810
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 2.5375816258298973e-05,
|
|
"loss": 3.0161,
|
|
"step": 11820
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 2.534085888391796e-05,
|
|
"loss": 3.0402,
|
|
"step": 11830
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 2.530590084294287e-05,
|
|
"loss": 3.0921,
|
|
"step": 11840
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 2.5270942203738736e-05,
|
|
"loss": 3.1017,
|
|
"step": 11850
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 2.523598303467173e-05,
|
|
"loss": 3.1229,
|
|
"step": 11860
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 2.520102340410907e-05,
|
|
"loss": 3.1025,
|
|
"step": 11870
|
|
},
|
|
{
|
|
"epoch": 2.56,
|
|
"learning_rate": 2.5166063380418887e-05,
|
|
"loss": 3.0991,
|
|
"step": 11880
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 2.513110303197008e-05,
|
|
"loss": 3.0159,
|
|
"step": 11890
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 2.509614242713216e-05,
|
|
"loss": 3.0764,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 2.5061181634275165e-05,
|
|
"loss": 3.0662,
|
|
"step": 11910
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 2.50262207217695e-05,
|
|
"loss": 3.0829,
|
|
"step": 11920
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 2.4991259757985783e-05,
|
|
"loss": 3.1125,
|
|
"step": 11930
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 2.4956298811294755e-05,
|
|
"loss": 3.0865,
|
|
"step": 11940
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 2.4921337950067105e-05,
|
|
"loss": 3.0786,
|
|
"step": 11950
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 2.4886377242673374e-05,
|
|
"loss": 3.0926,
|
|
"step": 11960
|
|
},
|
|
{
|
|
"epoch": 2.58,
|
|
"learning_rate": 2.485141675748378e-05,
|
|
"loss": 3.0696,
|
|
"step": 11970
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 2.481645656286812e-05,
|
|
"loss": 3.123,
|
|
"step": 11980
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 2.4781496727195633e-05,
|
|
"loss": 3.1018,
|
|
"step": 11990
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 2.474653731883484e-05,
|
|
"loss": 3.0686,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 2.4711578406153425e-05,
|
|
"loss": 3.0781,
|
|
"step": 12010
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 2.4676620057518113e-05,
|
|
"loss": 3.1127,
|
|
"step": 12020
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 2.4641662341294515e-05,
|
|
"loss": 3.0634,
|
|
"step": 12030
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 2.460670532584702e-05,
|
|
"loss": 3.0725,
|
|
"step": 12040
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 2.4571749079538628e-05,
|
|
"loss": 3.1118,
|
|
"step": 12050
|
|
},
|
|
{
|
|
"epoch": 2.6,
|
|
"learning_rate": 2.453679367073085e-05,
|
|
"loss": 3.13,
|
|
"step": 12060
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 2.4501839167783552e-05,
|
|
"loss": 3.0759,
|
|
"step": 12070
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 2.4466885639054836e-05,
|
|
"loss": 3.0375,
|
|
"step": 12080
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 2.4431933152900885e-05,
|
|
"loss": 3.048,
|
|
"step": 12090
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 2.439698177767586e-05,
|
|
"loss": 3.125,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 2.436203158173173e-05,
|
|
"loss": 3.0834,
|
|
"step": 12110
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 2.4327082633418177e-05,
|
|
"loss": 3.0619,
|
|
"step": 12120
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 2.4292135001082433e-05,
|
|
"loss": 3.0853,
|
|
"step": 12130
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 2.4257188753069156e-05,
|
|
"loss": 3.0798,
|
|
"step": 12140
|
|
},
|
|
{
|
|
"epoch": 2.62,
|
|
"learning_rate": 2.4222243957720293e-05,
|
|
"loss": 3.0659,
|
|
"step": 12150
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 2.418730068337497e-05,
|
|
"loss": 3.0525,
|
|
"step": 12160
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 2.4152358998369313e-05,
|
|
"loss": 3.1012,
|
|
"step": 12170
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 2.4117418971036355e-05,
|
|
"loss": 3.0754,
|
|
"step": 12180
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 2.4082480669705872e-05,
|
|
"loss": 3.0462,
|
|
"step": 12190
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 2.4047544162704296e-05,
|
|
"loss": 3.0427,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 2.4012609518354517e-05,
|
|
"loss": 3.0675,
|
|
"step": 12210
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 2.3977676804975803e-05,
|
|
"loss": 3.1119,
|
|
"step": 12220
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 2.3942746090883633e-05,
|
|
"loss": 3.0648,
|
|
"step": 12230
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 2.3907817444389583e-05,
|
|
"loss": 3.0725,
|
|
"step": 12240
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 2.3872890933801182e-05,
|
|
"loss": 3.0316,
|
|
"step": 12250
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 2.3837966627421785e-05,
|
|
"loss": 3.0828,
|
|
"step": 12260
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 2.380304459355043e-05,
|
|
"loss": 3.0851,
|
|
"step": 12270
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 2.376812490048172e-05,
|
|
"loss": 3.0935,
|
|
"step": 12280
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 2.3733207616505678e-05,
|
|
"loss": 3.0782,
|
|
"step": 12290
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 2.3698292809907606e-05,
|
|
"loss": 3.0609,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 2.3663380548967985e-05,
|
|
"loss": 3.0507,
|
|
"step": 12310
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 2.3628470901962282e-05,
|
|
"loss": 3.1248,
|
|
"step": 12320
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 2.3593563937160888e-05,
|
|
"loss": 3.0939,
|
|
"step": 12330
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 2.3558659722828918e-05,
|
|
"loss": 3.0961,
|
|
"step": 12340
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 2.3523758327226133e-05,
|
|
"loss": 3.0794,
|
|
"step": 12350
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 2.3488859818606768e-05,
|
|
"loss": 3.0917,
|
|
"step": 12360
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 2.3453964265219413e-05,
|
|
"loss": 3.0319,
|
|
"step": 12370
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 2.3419071735306884e-05,
|
|
"loss": 3.124,
|
|
"step": 12380
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 2.3384182297106062e-05,
|
|
"loss": 3.1025,
|
|
"step": 12390
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 2.3349296018847834e-05,
|
|
"loss": 3.0926,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 2.3314412968756855e-05,
|
|
"loss": 3.0543,
|
|
"step": 12410
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 2.327953321505149e-05,
|
|
"loss": 3.0743,
|
|
"step": 12420
|
|
},
|
|
{
|
|
"epoch": 2.68,
|
|
"learning_rate": 2.3244656825943654e-05,
|
|
"loss": 3.0518,
|
|
"step": 12430
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 2.3209783869638677e-05,
|
|
"loss": 3.0705,
|
|
"step": 12440
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 2.317491441433518e-05,
|
|
"loss": 3.0462,
|
|
"step": 12450
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 2.3140048528224945e-05,
|
|
"loss": 3.0676,
|
|
"step": 12460
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 2.3105186279492757e-05,
|
|
"loss": 3.0791,
|
|
"step": 12470
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 2.3070327736316304e-05,
|
|
"loss": 3.1268,
|
|
"step": 12480
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 2.303547296686602e-05,
|
|
"loss": 3.0877,
|
|
"step": 12490
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 2.3000622039304956e-05,
|
|
"loss": 3.0766,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 2.2965775021788653e-05,
|
|
"loss": 3.132,
|
|
"step": 12510
|
|
},
|
|
{
|
|
"epoch": 2.7,
|
|
"learning_rate": 2.2930931982465004e-05,
|
|
"loss": 3.0841,
|
|
"step": 12520
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 2.2896092989474132e-05,
|
|
"loss": 3.0925,
|
|
"step": 12530
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 2.2861258110948237e-05,
|
|
"loss": 3.0371,
|
|
"step": 12540
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 2.2826427415011466e-05,
|
|
"loss": 3.0535,
|
|
"step": 12550
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 2.2791600969779796e-05,
|
|
"loss": 3.0623,
|
|
"step": 12560
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 2.27567788433609e-05,
|
|
"loss": 3.0631,
|
|
"step": 12570
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 2.2721961103853985e-05,
|
|
"loss": 3.0896,
|
|
"step": 12580
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 2.2687147819349688e-05,
|
|
"loss": 3.1099,
|
|
"step": 12590
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 2.265233905792993e-05,
|
|
"loss": 3.0572,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 2.2617534887667806e-05,
|
|
"loss": 3.1104,
|
|
"step": 12610
|
|
},
|
|
{
|
|
"epoch": 2.72,
|
|
"learning_rate": 2.258273537662741e-05,
|
|
"loss": 3.0959,
|
|
"step": 12620
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 2.2547940592863728e-05,
|
|
"loss": 3.1369,
|
|
"step": 12630
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 2.251315060442251e-05,
|
|
"loss": 3.0734,
|
|
"step": 12640
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 2.2478365479340118e-05,
|
|
"loss": 3.1181,
|
|
"step": 12650
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 2.2443585285643412e-05,
|
|
"loss": 3.1178,
|
|
"step": 12660
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 2.24088100913496e-05,
|
|
"loss": 3.0586,
|
|
"step": 12670
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 2.2374039964466113e-05,
|
|
"loss": 3.1085,
|
|
"step": 12680
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 2.2339274972990487e-05,
|
|
"loss": 3.0578,
|
|
"step": 12690
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 2.2304515184910197e-05,
|
|
"loss": 3.0263,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 2.74,
|
|
"learning_rate": 2.226976066820255e-05,
|
|
"loss": 3.0977,
|
|
"step": 12710
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 2.223501149083454e-05,
|
|
"loss": 3.0817,
|
|
"step": 12720
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 2.2200267720762728e-05,
|
|
"loss": 3.0511,
|
|
"step": 12730
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 2.216552942593309e-05,
|
|
"loss": 3.0776,
|
|
"step": 12740
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 2.2130796674280893e-05,
|
|
"loss": 3.0923,
|
|
"step": 12750
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 2.2096069533730587e-05,
|
|
"loss": 3.0393,
|
|
"step": 12760
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 2.206134807219562e-05,
|
|
"loss": 3.0755,
|
|
"step": 12770
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 2.2026632357578348e-05,
|
|
"loss": 3.1093,
|
|
"step": 12780
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 2.1991922457769885e-05,
|
|
"loss": 3.0742,
|
|
"step": 12790
|
|
},
|
|
{
|
|
"epoch": 2.76,
|
|
"learning_rate": 2.1957218440649964e-05,
|
|
"loss": 3.0783,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 2.192252037408684e-05,
|
|
"loss": 3.0407,
|
|
"step": 12810
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 2.188782832593711e-05,
|
|
"loss": 3.1184,
|
|
"step": 12820
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 2.18531423640456e-05,
|
|
"loss": 3.1023,
|
|
"step": 12830
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 2.1818462556245246e-05,
|
|
"loss": 3.0721,
|
|
"step": 12840
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 2.178378897035694e-05,
|
|
"loss": 3.1031,
|
|
"step": 12850
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.1749121674189405e-05,
|
|
"loss": 3.0602,
|
|
"step": 12860
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.171446073553907e-05,
|
|
"loss": 3.0958,
|
|
"step": 12870
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.1679806222189924e-05,
|
|
"loss": 3.0477,
|
|
"step": 12880
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.1645158201913402e-05,
|
|
"loss": 3.0522,
|
|
"step": 12890
|
|
},
|
|
{
|
|
"epoch": 2.78,
|
|
"learning_rate": 2.1610516742468227e-05,
|
|
"loss": 3.0632,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.15758819116003e-05,
|
|
"loss": 3.0794,
|
|
"step": 12910
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.1541253777042552e-05,
|
|
"loss": 3.0774,
|
|
"step": 12920
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.150663240651483e-05,
|
|
"loss": 3.1031,
|
|
"step": 12930
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 2.1472017867723747e-05,
|
|
"loss": 3.0277,
|
|
"step": 12940
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.143741022836255e-05,
|
|
"loss": 3.0626,
|
|
"step": 12950
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.1402809556110997e-05,
|
|
"loss": 3.049,
|
|
"step": 12960
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.136821591863522e-05,
|
|
"loss": 3.0808,
|
|
"step": 12970
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.1333629383587613e-05,
|
|
"loss": 3.069,
|
|
"step": 12980
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 2.1299050018606648e-05,
|
|
"loss": 3.0232,
|
|
"step": 12990
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.1264477891316792e-05,
|
|
"loss": 3.0622,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.1229913069328353e-05,
|
|
"loss": 3.0584,
|
|
"step": 13010
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.1195355620237366e-05,
|
|
"loss": 3.0836,
|
|
"step": 13020
|
|
},
|
|
{
|
|
"epoch": 2.81,
|
|
"learning_rate": 2.1160805611625425e-05,
|
|
"loss": 3.0443,
|
|
"step": 13030
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 2.1126263111059586e-05,
|
|
"loss": 3.0655,
|
|
"step": 13040
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 2.109172818609223e-05,
|
|
"loss": 3.0734,
|
|
"step": 13050
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 2.105720090426091e-05,
|
|
"loss": 3.114,
|
|
"step": 13060
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 2.102268133308823e-05,
|
|
"loss": 3.0608,
|
|
"step": 13070
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 2.0988169540081728e-05,
|
|
"loss": 3.0571,
|
|
"step": 13080
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 2.095366559273372e-05,
|
|
"loss": 3.0478,
|
|
"step": 13090
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 2.091916955852118e-05,
|
|
"loss": 3.0968,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 2.0884681504905608e-05,
|
|
"loss": 3.0641,
|
|
"step": 13110
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 2.0850201499332904e-05,
|
|
"loss": 3.0924,
|
|
"step": 13120
|
|
},
|
|
{
|
|
"epoch": 2.83,
|
|
"learning_rate": 2.0815729609233215e-05,
|
|
"loss": 3.1327,
|
|
"step": 13130
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 2.078126590202083e-05,
|
|
"loss": 3.0996,
|
|
"step": 13140
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 2.0746810445094028e-05,
|
|
"loss": 3.117,
|
|
"step": 13150
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 2.0712363305834955e-05,
|
|
"loss": 3.0755,
|
|
"step": 13160
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 2.0677924551609495e-05,
|
|
"loss": 3.083,
|
|
"step": 13170
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 2.0643494249767126e-05,
|
|
"loss": 3.0577,
|
|
"step": 13180
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 2.0609072467640804e-05,
|
|
"loss": 3.1007,
|
|
"step": 13190
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 2.0574659272546812e-05,
|
|
"loss": 2.9806,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 2.0540254731784656e-05,
|
|
"loss": 3.086,
|
|
"step": 13210
|
|
},
|
|
{
|
|
"epoch": 2.85,
|
|
"learning_rate": 2.050585891263691e-05,
|
|
"loss": 3.0449,
|
|
"step": 13220
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 2.047147188236909e-05,
|
|
"loss": 3.0903,
|
|
"step": 13230
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 2.0437093708229528e-05,
|
|
"loss": 3.0774,
|
|
"step": 13240
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 2.040272445744923e-05,
|
|
"loss": 3.0449,
|
|
"step": 13250
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 2.0368364197241753e-05,
|
|
"loss": 3.0955,
|
|
"step": 13260
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 2.0334012994803074e-05,
|
|
"loss": 3.0442,
|
|
"step": 13270
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 2.0299670917311463e-05,
|
|
"loss": 3.058,
|
|
"step": 13280
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 2.0265338031927336e-05,
|
|
"loss": 3.0525,
|
|
"step": 13290
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 2.0231014405793134e-05,
|
|
"loss": 3.0448,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 2.87,
|
|
"learning_rate": 2.019670010603319e-05,
|
|
"loss": 3.0906,
|
|
"step": 13310
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 2.0162395199753596e-05,
|
|
"loss": 3.0079,
|
|
"step": 13320
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 2.0128099754042088e-05,
|
|
"loss": 3.0783,
|
|
"step": 13330
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 2.0093813835967878e-05,
|
|
"loss": 3.0897,
|
|
"step": 13340
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 2.0059537512581566e-05,
|
|
"loss": 3.0362,
|
|
"step": 13350
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 2.0025270850914975e-05,
|
|
"loss": 3.0994,
|
|
"step": 13360
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 1.9991013917981038e-05,
|
|
"loss": 3.0839,
|
|
"step": 13370
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 1.9956766780773665e-05,
|
|
"loss": 3.0551,
|
|
"step": 13380
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 1.9922529506267602e-05,
|
|
"loss": 3.0812,
|
|
"step": 13390
|
|
},
|
|
{
|
|
"epoch": 2.89,
|
|
"learning_rate": 1.9888302161418313e-05,
|
|
"loss": 3.1256,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 1.985408481316184e-05,
|
|
"loss": 3.1231,
|
|
"step": 13410
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 1.981987752841467e-05,
|
|
"loss": 3.066,
|
|
"step": 13420
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 1.978568037407364e-05,
|
|
"loss": 3.0784,
|
|
"step": 13430
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 1.9751493417015736e-05,
|
|
"loss": 3.0503,
|
|
"step": 13440
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 1.9717316724098016e-05,
|
|
"loss": 3.0933,
|
|
"step": 13450
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 1.9683150362157476e-05,
|
|
"loss": 3.0458,
|
|
"step": 13460
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 1.9648994398010893e-05,
|
|
"loss": 3.0642,
|
|
"step": 13470
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 1.9614848898454717e-05,
|
|
"loss": 3.1345,
|
|
"step": 13480
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 1.958071393026493e-05,
|
|
"loss": 3.0581,
|
|
"step": 13490
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"learning_rate": 1.9546589560196925e-05,
|
|
"loss": 3.0417,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 1.951247585498537e-05,
|
|
"loss": 3.0763,
|
|
"step": 13510
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 1.9478372881344063e-05,
|
|
"loss": 3.0669,
|
|
"step": 13520
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 1.944428070596583e-05,
|
|
"loss": 2.9967,
|
|
"step": 13530
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 1.9410199395522367e-05,
|
|
"loss": 3.075,
|
|
"step": 13540
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 1.9376129016664128e-05,
|
|
"loss": 3.0736,
|
|
"step": 13550
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 1.93420696360202e-05,
|
|
"loss": 3.0856,
|
|
"step": 13560
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 1.9308021320198135e-05,
|
|
"loss": 3.1077,
|
|
"step": 13570
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 1.9273984135783872e-05,
|
|
"loss": 3.0321,
|
|
"step": 13580
|
|
},
|
|
{
|
|
"epoch": 2.93,
|
|
"learning_rate": 1.9239958149341572e-05,
|
|
"loss": 3.1347,
|
|
"step": 13590
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 1.9205943427413492e-05,
|
|
"loss": 3.0587,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 1.9171940036519864e-05,
|
|
"loss": 3.0254,
|
|
"step": 13610
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 1.913794804315876e-05,
|
|
"loss": 3.1113,
|
|
"step": 13620
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 1.9103967513805956e-05,
|
|
"loss": 3.1004,
|
|
"step": 13630
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 1.9069998514914832e-05,
|
|
"loss": 3.0155,
|
|
"step": 13640
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.9036041112916198e-05,
|
|
"loss": 3.1082,
|
|
"step": 13650
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.9002095374218186e-05,
|
|
"loss": 3.0399,
|
|
"step": 13660
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.8968161365206115e-05,
|
|
"loss": 3.1204,
|
|
"step": 13670
|
|
},
|
|
{
|
|
"epoch": 2.95,
|
|
"learning_rate": 1.8934239152242384e-05,
|
|
"loss": 3.1119,
|
|
"step": 13680
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 1.8900328801666306e-05,
|
|
"loss": 3.1011,
|
|
"step": 13690
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 1.8866430379794e-05,
|
|
"loss": 3.0728,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 1.8832543952918256e-05,
|
|
"loss": 3.1189,
|
|
"step": 13710
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 1.8798669587308416e-05,
|
|
"loss": 3.0881,
|
|
"step": 13720
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 1.8764807349210213e-05,
|
|
"loss": 3.0758,
|
|
"step": 13730
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 1.873095730484569e-05,
|
|
"loss": 3.1332,
|
|
"step": 13740
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 1.869711952041303e-05,
|
|
"loss": 3.0424,
|
|
"step": 13750
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 1.8663294062086432e-05,
|
|
"loss": 3.0782,
|
|
"step": 13760
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 1.8629480996016e-05,
|
|
"loss": 3.017,
|
|
"step": 13770
|
|
},
|
|
{
|
|
"epoch": 2.97,
|
|
"learning_rate": 1.859568038832761e-05,
|
|
"loss": 3.0482,
|
|
"step": 13780
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 1.856189230512276e-05,
|
|
"loss": 3.0514,
|
|
"step": 13790
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 1.852811681247845e-05,
|
|
"loss": 3.1268,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 1.849435397644708e-05,
|
|
"loss": 3.0204,
|
|
"step": 13810
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 1.8460603863056285e-05,
|
|
"loss": 3.0401,
|
|
"step": 13820
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 1.8426866538308803e-05,
|
|
"loss": 3.0728,
|
|
"step": 13830
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 1.839314206818241e-05,
|
|
"loss": 3.0712,
|
|
"step": 13840
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 1.8359430518629696e-05,
|
|
"loss": 3.02,
|
|
"step": 13850
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 1.8325731955577995e-05,
|
|
"loss": 3.0477,
|
|
"step": 13860
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 1.8292046444929256e-05,
|
|
"loss": 3.0242,
|
|
"step": 13870
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 1.8258374052559895e-05,
|
|
"loss": 3.1099,
|
|
"step": 13880
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 1.8224714844320673e-05,
|
|
"loss": 3.0553,
|
|
"step": 13890
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_loss": 3.0703344345092773,
|
|
"eval_runtime": 191.332,
|
|
"eval_samples_per_second": 774.674,
|
|
"eval_steps_per_second": 24.209,
|
|
"step": 13896
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 1.819106888603656e-05,
|
|
"loss": 3.0542,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 1.8157436243506636e-05,
|
|
"loss": 3.0391,
|
|
"step": 13910
|
|
},
|
|
{
|
|
"epoch": 3.01,
|
|
"learning_rate": 1.812381698250392e-05,
|
|
"loss": 3.0663,
|
|
"step": 13920
|
|
},
|
|
{
|
|
"epoch": 3.01,
|
|
"learning_rate": 1.8090211168775264e-05,
|
|
"loss": 3.0202,
|
|
"step": 13930
|
|
},
|
|
{
|
|
"epoch": 3.01,
|
|
"learning_rate": 1.8056618868041233e-05,
|
|
"loss": 3.0616,
|
|
"step": 13940
|
|
},
|
|
{
|
|
"epoch": 3.01,
|
|
"learning_rate": 1.802304014599595e-05,
|
|
"loss": 3.0781,
|
|
"step": 13950
|
|
},
|
|
{
|
|
"epoch": 3.01,
|
|
"learning_rate": 1.7989475068307003e-05,
|
|
"loss": 3.0343,
|
|
"step": 13960
|
|
},
|
|
{
|
|
"epoch": 3.02,
|
|
"learning_rate": 1.7955923700615284e-05,
|
|
"loss": 3.0459,
|
|
"step": 13970
|
|
},
|
|
{
|
|
"epoch": 3.02,
|
|
"learning_rate": 1.7922386108534873e-05,
|
|
"loss": 3.0434,
|
|
"step": 13980
|
|
},
|
|
{
|
|
"epoch": 3.02,
|
|
"learning_rate": 1.788886235765291e-05,
|
|
"loss": 3.0426,
|
|
"step": 13990
|
|
},
|
|
{
|
|
"epoch": 3.02,
|
|
"learning_rate": 1.7855352513529466e-05,
|
|
"loss": 3.0538,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 3.02,
|
|
"learning_rate": 1.7821856641697425e-05,
|
|
"loss": 3.0079,
|
|
"step": 14010
|
|
},
|
|
{
|
|
"epoch": 3.03,
|
|
"learning_rate": 1.778837480766234e-05,
|
|
"loss": 3.0746,
|
|
"step": 14020
|
|
},
|
|
{
|
|
"epoch": 3.03,
|
|
"learning_rate": 1.7754907076902305e-05,
|
|
"loss": 3.0435,
|
|
"step": 14030
|
|
},
|
|
{
|
|
"epoch": 3.03,
|
|
"learning_rate": 1.772145351486783e-05,
|
|
"loss": 3.0989,
|
|
"step": 14040
|
|
},
|
|
{
|
|
"epoch": 3.03,
|
|
"learning_rate": 1.768801418698175e-05,
|
|
"loss": 3.0364,
|
|
"step": 14050
|
|
},
|
|
{
|
|
"epoch": 3.04,
|
|
"learning_rate": 1.7654589158639024e-05,
|
|
"loss": 3.0943,
|
|
"step": 14060
|
|
},
|
|
{
|
|
"epoch": 3.04,
|
|
"learning_rate": 1.7621178495206665e-05,
|
|
"loss": 3.0778,
|
|
"step": 14070
|
|
},
|
|
{
|
|
"epoch": 3.04,
|
|
"learning_rate": 1.7587782262023583e-05,
|
|
"loss": 3.1174,
|
|
"step": 14080
|
|
},
|
|
{
|
|
"epoch": 3.04,
|
|
"learning_rate": 1.7554400524400482e-05,
|
|
"loss": 3.0757,
|
|
"step": 14090
|
|
},
|
|
{
|
|
"epoch": 3.04,
|
|
"learning_rate": 1.7521033347619707e-05,
|
|
"loss": 3.0477,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 3.05,
|
|
"learning_rate": 1.748768079693513e-05,
|
|
"loss": 3.0232,
|
|
"step": 14110
|
|
},
|
|
{
|
|
"epoch": 3.05,
|
|
"learning_rate": 1.7454342937572016e-05,
|
|
"loss": 3.0792,
|
|
"step": 14120
|
|
},
|
|
{
|
|
"epoch": 3.05,
|
|
"learning_rate": 1.7421019834726914e-05,
|
|
"loss": 3.0128,
|
|
"step": 14130
|
|
},
|
|
{
|
|
"epoch": 3.05,
|
|
"learning_rate": 1.7387711553567496e-05,
|
|
"loss": 3.0611,
|
|
"step": 14140
|
|
},
|
|
{
|
|
"epoch": 3.05,
|
|
"learning_rate": 1.735441815923246e-05,
|
|
"loss": 3.0558,
|
|
"step": 14150
|
|
},
|
|
{
|
|
"epoch": 3.06,
|
|
"learning_rate": 1.7321139716831385e-05,
|
|
"loss": 3.094,
|
|
"step": 14160
|
|
},
|
|
{
|
|
"epoch": 3.06,
|
|
"learning_rate": 1.7287876291444615e-05,
|
|
"loss": 3.1006,
|
|
"step": 14170
|
|
},
|
|
{
|
|
"epoch": 3.06,
|
|
"learning_rate": 1.725462794812312e-05,
|
|
"loss": 3.0843,
|
|
"step": 14180
|
|
},
|
|
{
|
|
"epoch": 3.06,
|
|
"learning_rate": 1.722139475188838e-05,
|
|
"loss": 3.0466,
|
|
"step": 14190
|
|
},
|
|
{
|
|
"epoch": 3.07,
|
|
"learning_rate": 1.7188176767732252e-05,
|
|
"loss": 3.0243,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 3.07,
|
|
"learning_rate": 1.7154974060616845e-05,
|
|
"loss": 3.0615,
|
|
"step": 14210
|
|
},
|
|
{
|
|
"epoch": 3.07,
|
|
"learning_rate": 1.7121786695474383e-05,
|
|
"loss": 3.0615,
|
|
"step": 14220
|
|
},
|
|
{
|
|
"epoch": 3.07,
|
|
"learning_rate": 1.7088614737207105e-05,
|
|
"loss": 3.0557,
|
|
"step": 14230
|
|
},
|
|
{
|
|
"epoch": 3.07,
|
|
"learning_rate": 1.705545825068709e-05,
|
|
"loss": 3.0453,
|
|
"step": 14240
|
|
},
|
|
{
|
|
"epoch": 3.08,
|
|
"learning_rate": 1.702231730075619e-05,
|
|
"loss": 3.0437,
|
|
"step": 14250
|
|
},
|
|
{
|
|
"epoch": 3.08,
|
|
"learning_rate": 1.6989191952225863e-05,
|
|
"loss": 3.084,
|
|
"step": 14260
|
|
},
|
|
{
|
|
"epoch": 3.08,
|
|
"learning_rate": 1.6956082269877056e-05,
|
|
"loss": 3.0663,
|
|
"step": 14270
|
|
},
|
|
{
|
|
"epoch": 3.08,
|
|
"learning_rate": 1.6922988318460076e-05,
|
|
"loss": 3.0339,
|
|
"step": 14280
|
|
},
|
|
{
|
|
"epoch": 3.09,
|
|
"learning_rate": 1.6889910162694463e-05,
|
|
"loss": 3.06,
|
|
"step": 14290
|
|
},
|
|
{
|
|
"epoch": 3.09,
|
|
"learning_rate": 1.6856847867268876e-05,
|
|
"loss": 3.0486,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 3.09,
|
|
"learning_rate": 1.682380149684095e-05,
|
|
"loss": 3.0859,
|
|
"step": 14310
|
|
},
|
|
{
|
|
"epoch": 3.09,
|
|
"learning_rate": 1.679077111603718e-05,
|
|
"loss": 3.0324,
|
|
"step": 14320
|
|
},
|
|
{
|
|
"epoch": 3.09,
|
|
"learning_rate": 1.675775678945279e-05,
|
|
"loss": 3.078,
|
|
"step": 14330
|
|
},
|
|
{
|
|
"epoch": 3.1,
|
|
"learning_rate": 1.6724758581651607e-05,
|
|
"loss": 3.0725,
|
|
"step": 14340
|
|
},
|
|
{
|
|
"epoch": 3.1,
|
|
"learning_rate": 1.6691776557165932e-05,
|
|
"loss": 3.0497,
|
|
"step": 14350
|
|
},
|
|
{
|
|
"epoch": 3.1,
|
|
"learning_rate": 1.6658810780496437e-05,
|
|
"loss": 3.0521,
|
|
"step": 14360
|
|
},
|
|
{
|
|
"epoch": 3.1,
|
|
"learning_rate": 1.662586131611199e-05,
|
|
"loss": 3.0912,
|
|
"step": 14370
|
|
},
|
|
{
|
|
"epoch": 3.1,
|
|
"learning_rate": 1.6592928228449578e-05,
|
|
"loss": 3.0547,
|
|
"step": 14380
|
|
},
|
|
{
|
|
"epoch": 3.11,
|
|
"learning_rate": 1.6560011581914153e-05,
|
|
"loss": 3.022,
|
|
"step": 14390
|
|
},
|
|
{
|
|
"epoch": 3.11,
|
|
"learning_rate": 1.6527111440878518e-05,
|
|
"loss": 3.093,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 3.11,
|
|
"learning_rate": 1.6494227869683194e-05,
|
|
"loss": 3.0573,
|
|
"step": 14410
|
|
},
|
|
{
|
|
"epoch": 3.11,
|
|
"learning_rate": 1.6461360932636308e-05,
|
|
"loss": 3.0709,
|
|
"step": 14420
|
|
},
|
|
{
|
|
"epoch": 3.12,
|
|
"learning_rate": 1.6428510694013444e-05,
|
|
"loss": 3.0223,
|
|
"step": 14430
|
|
},
|
|
{
|
|
"epoch": 3.12,
|
|
"learning_rate": 1.6395677218057533e-05,
|
|
"loss": 3.0011,
|
|
"step": 14440
|
|
},
|
|
{
|
|
"epoch": 3.12,
|
|
"learning_rate": 1.6362860568978715e-05,
|
|
"loss": 3.0707,
|
|
"step": 14450
|
|
},
|
|
{
|
|
"epoch": 3.12,
|
|
"learning_rate": 1.633006081095426e-05,
|
|
"loss": 3.0621,
|
|
"step": 14460
|
|
},
|
|
{
|
|
"epoch": 3.12,
|
|
"learning_rate": 1.6297278008128362e-05,
|
|
"loss": 3.0078,
|
|
"step": 14470
|
|
},
|
|
{
|
|
"epoch": 3.13,
|
|
"learning_rate": 1.626451222461207e-05,
|
|
"loss": 3.0806,
|
|
"step": 14480
|
|
},
|
|
{
|
|
"epoch": 3.13,
|
|
"learning_rate": 1.6231763524483165e-05,
|
|
"loss": 3.0782,
|
|
"step": 14490
|
|
},
|
|
{
|
|
"epoch": 3.13,
|
|
"learning_rate": 1.6199031971786006e-05,
|
|
"loss": 3.0366,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 3.13,
|
|
"learning_rate": 1.6166317630531412e-05,
|
|
"loss": 3.0531,
|
|
"step": 14510
|
|
},
|
|
{
|
|
"epoch": 3.13,
|
|
"learning_rate": 1.613362056469656e-05,
|
|
"loss": 3.0323,
|
|
"step": 14520
|
|
},
|
|
{
|
|
"epoch": 3.14,
|
|
"learning_rate": 1.6100940838224828e-05,
|
|
"loss": 3.0446,
|
|
"step": 14530
|
|
},
|
|
{
|
|
"epoch": 3.14,
|
|
"learning_rate": 1.6068278515025688e-05,
|
|
"loss": 3.061,
|
|
"step": 14540
|
|
},
|
|
{
|
|
"epoch": 3.14,
|
|
"learning_rate": 1.6035633658974584e-05,
|
|
"loss": 3.0469,
|
|
"step": 14550
|
|
},
|
|
{
|
|
"epoch": 3.14,
|
|
"learning_rate": 1.600300633391279e-05,
|
|
"loss": 3.0177,
|
|
"step": 14560
|
|
},
|
|
{
|
|
"epoch": 3.15,
|
|
"learning_rate": 1.5970396603647308e-05,
|
|
"loss": 3.0747,
|
|
"step": 14570
|
|
},
|
|
{
|
|
"epoch": 3.15,
|
|
"learning_rate": 1.5937804531950724e-05,
|
|
"loss": 3.0483,
|
|
"step": 14580
|
|
},
|
|
{
|
|
"epoch": 3.15,
|
|
"learning_rate": 1.590523018256109e-05,
|
|
"loss": 3.0961,
|
|
"step": 14590
|
|
},
|
|
{
|
|
"epoch": 3.15,
|
|
"learning_rate": 1.58726736191818e-05,
|
|
"loss": 3.0801,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 3.15,
|
|
"learning_rate": 1.5840134905481467e-05,
|
|
"loss": 3.0833,
|
|
"step": 14610
|
|
},
|
|
{
|
|
"epoch": 3.16,
|
|
"learning_rate": 1.58076141050938e-05,
|
|
"loss": 3.0564,
|
|
"step": 14620
|
|
},
|
|
{
|
|
"epoch": 3.16,
|
|
"learning_rate": 1.5775111281617463e-05,
|
|
"loss": 3.0564,
|
|
"step": 14630
|
|
},
|
|
{
|
|
"epoch": 3.16,
|
|
"learning_rate": 1.5742626498615975e-05,
|
|
"loss": 3.0466,
|
|
"step": 14640
|
|
},
|
|
{
|
|
"epoch": 3.16,
|
|
"learning_rate": 1.5710159819617576e-05,
|
|
"loss": 3.0252,
|
|
"step": 14650
|
|
},
|
|
{
|
|
"epoch": 3.16,
|
|
"learning_rate": 1.5677711308115106e-05,
|
|
"loss": 3.0641,
|
|
"step": 14660
|
|
},
|
|
{
|
|
"epoch": 3.17,
|
|
"learning_rate": 1.5645281027565856e-05,
|
|
"loss": 3.0575,
|
|
"step": 14670
|
|
},
|
|
{
|
|
"epoch": 3.17,
|
|
"learning_rate": 1.5612869041391477e-05,
|
|
"loss": 3.0546,
|
|
"step": 14680
|
|
},
|
|
{
|
|
"epoch": 3.17,
|
|
"learning_rate": 1.5580475412977845e-05,
|
|
"loss": 3.0695,
|
|
"step": 14690
|
|
},
|
|
{
|
|
"epoch": 3.17,
|
|
"learning_rate": 1.5548100205674932e-05,
|
|
"loss": 3.0535,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 3.18,
|
|
"learning_rate": 1.5515743482796673e-05,
|
|
"loss": 3.0548,
|
|
"step": 14710
|
|
},
|
|
{
|
|
"epoch": 3.18,
|
|
"learning_rate": 1.5483405307620884e-05,
|
|
"loss": 3.0137,
|
|
"step": 14720
|
|
},
|
|
{
|
|
"epoch": 3.18,
|
|
"learning_rate": 1.5451085743389082e-05,
|
|
"loss": 3.0788,
|
|
"step": 14730
|
|
},
|
|
{
|
|
"epoch": 3.18,
|
|
"learning_rate": 1.5418784853306397e-05,
|
|
"loss": 3.0569,
|
|
"step": 14740
|
|
},
|
|
{
|
|
"epoch": 3.18,
|
|
"learning_rate": 1.538650270054144e-05,
|
|
"loss": 3.0883,
|
|
"step": 14750
|
|
},
|
|
{
|
|
"epoch": 3.19,
|
|
"learning_rate": 1.5354239348226174e-05,
|
|
"loss": 3.0535,
|
|
"step": 14760
|
|
},
|
|
{
|
|
"epoch": 3.19,
|
|
"learning_rate": 1.53219948594558e-05,
|
|
"loss": 3.0504,
|
|
"step": 14770
|
|
},
|
|
{
|
|
"epoch": 3.19,
|
|
"learning_rate": 1.528976929728863e-05,
|
|
"loss": 3.0356,
|
|
"step": 14780
|
|
},
|
|
{
|
|
"epoch": 3.19,
|
|
"learning_rate": 1.5257562724745957e-05,
|
|
"loss": 3.0523,
|
|
"step": 14790
|
|
},
|
|
{
|
|
"epoch": 3.2,
|
|
"learning_rate": 1.5225375204811943e-05,
|
|
"loss": 3.0525,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 3.2,
|
|
"learning_rate": 1.5193206800433487e-05,
|
|
"loss": 3.075,
|
|
"step": 14810
|
|
},
|
|
{
|
|
"epoch": 3.2,
|
|
"learning_rate": 1.5161057574520104e-05,
|
|
"loss": 3.0255,
|
|
"step": 14820
|
|
},
|
|
{
|
|
"epoch": 3.2,
|
|
"learning_rate": 1.5128927589943808e-05,
|
|
"loss": 3.0516,
|
|
"step": 14830
|
|
},
|
|
{
|
|
"epoch": 3.2,
|
|
"learning_rate": 1.5096816909538974e-05,
|
|
"loss": 3.0168,
|
|
"step": 14840
|
|
},
|
|
{
|
|
"epoch": 3.21,
|
|
"learning_rate": 1.5064725596102242e-05,
|
|
"loss": 3.047,
|
|
"step": 14850
|
|
},
|
|
{
|
|
"epoch": 3.21,
|
|
"learning_rate": 1.5032653712392346e-05,
|
|
"loss": 3.0618,
|
|
"step": 14860
|
|
},
|
|
{
|
|
"epoch": 3.21,
|
|
"learning_rate": 1.5000601321130076e-05,
|
|
"loss": 3.0317,
|
|
"step": 14870
|
|
},
|
|
{
|
|
"epoch": 3.21,
|
|
"learning_rate": 1.4968568484998047e-05,
|
|
"loss": 3.0799,
|
|
"step": 14880
|
|
},
|
|
{
|
|
"epoch": 3.21,
|
|
"learning_rate": 1.4936555266640665e-05,
|
|
"loss": 3.0745,
|
|
"step": 14890
|
|
},
|
|
{
|
|
"epoch": 3.22,
|
|
"learning_rate": 1.4904561728663952e-05,
|
|
"loss": 3.0677,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 3.22,
|
|
"learning_rate": 1.4872587933635458e-05,
|
|
"loss": 3.0482,
|
|
"step": 14910
|
|
},
|
|
{
|
|
"epoch": 3.22,
|
|
"learning_rate": 1.4840633944084109e-05,
|
|
"loss": 3.0413,
|
|
"step": 14920
|
|
},
|
|
{
|
|
"epoch": 3.22,
|
|
"learning_rate": 1.4808699822500105e-05,
|
|
"loss": 3.0743,
|
|
"step": 14930
|
|
},
|
|
{
|
|
"epoch": 3.23,
|
|
"learning_rate": 1.4776785631334799e-05,
|
|
"loss": 3.0688,
|
|
"step": 14940
|
|
},
|
|
{
|
|
"epoch": 3.23,
|
|
"learning_rate": 1.4744891433000558e-05,
|
|
"loss": 3.0695,
|
|
"step": 14950
|
|
},
|
|
{
|
|
"epoch": 3.23,
|
|
"learning_rate": 1.4713017289870647e-05,
|
|
"loss": 3.0509,
|
|
"step": 14960
|
|
},
|
|
{
|
|
"epoch": 3.23,
|
|
"learning_rate": 1.4681163264279124e-05,
|
|
"loss": 3.0831,
|
|
"step": 14970
|
|
},
|
|
{
|
|
"epoch": 3.23,
|
|
"learning_rate": 1.4649329418520697e-05,
|
|
"loss": 3.0686,
|
|
"step": 14980
|
|
},
|
|
{
|
|
"epoch": 3.24,
|
|
"learning_rate": 1.4617515814850603e-05,
|
|
"loss": 3.0389,
|
|
"step": 14990
|
|
},
|
|
{
|
|
"epoch": 3.24,
|
|
"learning_rate": 1.45857225154845e-05,
|
|
"loss": 3.0766,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 3.24,
|
|
"learning_rate": 1.4553949582598345e-05,
|
|
"loss": 3.0647,
|
|
"step": 15010
|
|
},
|
|
{
|
|
"epoch": 3.24,
|
|
"learning_rate": 1.4522197078328253e-05,
|
|
"loss": 3.0675,
|
|
"step": 15020
|
|
},
|
|
{
|
|
"epoch": 3.24,
|
|
"learning_rate": 1.4490465064770392e-05,
|
|
"loss": 3.0314,
|
|
"step": 15030
|
|
},
|
|
{
|
|
"epoch": 3.25,
|
|
"learning_rate": 1.4458753603980866e-05,
|
|
"loss": 3.0655,
|
|
"step": 15040
|
|
},
|
|
{
|
|
"epoch": 3.25,
|
|
"learning_rate": 1.4427062757975573e-05,
|
|
"loss": 3.0644,
|
|
"step": 15050
|
|
},
|
|
{
|
|
"epoch": 3.25,
|
|
"learning_rate": 1.4395392588730095e-05,
|
|
"loss": 3.0548,
|
|
"step": 15060
|
|
},
|
|
{
|
|
"epoch": 3.25,
|
|
"learning_rate": 1.4363743158179598e-05,
|
|
"loss": 3.0627,
|
|
"step": 15070
|
|
},
|
|
{
|
|
"epoch": 3.26,
|
|
"learning_rate": 1.433211452821868e-05,
|
|
"loss": 3.078,
|
|
"step": 15080
|
|
},
|
|
{
|
|
"epoch": 3.26,
|
|
"learning_rate": 1.4300506760701248e-05,
|
|
"loss": 3.0849,
|
|
"step": 15090
|
|
},
|
|
{
|
|
"epoch": 3.26,
|
|
"learning_rate": 1.4268919917440423e-05,
|
|
"loss": 3.0851,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 3.26,
|
|
"learning_rate": 1.4237354060208402e-05,
|
|
"loss": 2.9985,
|
|
"step": 15110
|
|
},
|
|
{
|
|
"epoch": 3.26,
|
|
"learning_rate": 1.4205809250736347e-05,
|
|
"loss": 3.0449,
|
|
"step": 15120
|
|
},
|
|
{
|
|
"epoch": 3.27,
|
|
"learning_rate": 1.4174285550714247e-05,
|
|
"loss": 3.0482,
|
|
"step": 15130
|
|
},
|
|
{
|
|
"epoch": 3.27,
|
|
"learning_rate": 1.4142783021790817e-05,
|
|
"loss": 3.0242,
|
|
"step": 15140
|
|
},
|
|
{
|
|
"epoch": 3.27,
|
|
"learning_rate": 1.4111301725573367e-05,
|
|
"loss": 3.0998,
|
|
"step": 15150
|
|
},
|
|
{
|
|
"epoch": 3.27,
|
|
"learning_rate": 1.4079841723627688e-05,
|
|
"loss": 3.0741,
|
|
"step": 15160
|
|
},
|
|
{
|
|
"epoch": 3.28,
|
|
"learning_rate": 1.4048403077477918e-05,
|
|
"loss": 3.052,
|
|
"step": 15170
|
|
},
|
|
{
|
|
"epoch": 3.28,
|
|
"learning_rate": 1.4016985848606435e-05,
|
|
"loss": 3.0273,
|
|
"step": 15180
|
|
},
|
|
{
|
|
"epoch": 3.28,
|
|
"learning_rate": 1.3985590098453738e-05,
|
|
"loss": 3.0439,
|
|
"step": 15190
|
|
},
|
|
{
|
|
"epoch": 3.28,
|
|
"learning_rate": 1.3954215888418318e-05,
|
|
"loss": 3.0551,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 3.28,
|
|
"learning_rate": 1.3922863279856535e-05,
|
|
"loss": 3.1111,
|
|
"step": 15210
|
|
},
|
|
{
|
|
"epoch": 3.29,
|
|
"learning_rate": 1.3891532334082518e-05,
|
|
"loss": 3.0981,
|
|
"step": 15220
|
|
},
|
|
{
|
|
"epoch": 3.29,
|
|
"learning_rate": 1.386022311236802e-05,
|
|
"loss": 3.0954,
|
|
"step": 15230
|
|
},
|
|
{
|
|
"epoch": 3.29,
|
|
"learning_rate": 1.38289356759423e-05,
|
|
"loss": 3.0783,
|
|
"step": 15240
|
|
},
|
|
{
|
|
"epoch": 3.29,
|
|
"learning_rate": 1.3797670085992053e-05,
|
|
"loss": 3.0643,
|
|
"step": 15250
|
|
},
|
|
{
|
|
"epoch": 3.29,
|
|
"learning_rate": 1.3766426403661215e-05,
|
|
"loss": 3.0291,
|
|
"step": 15260
|
|
},
|
|
{
|
|
"epoch": 3.3,
|
|
"learning_rate": 1.3735204690050879e-05,
|
|
"loss": 3.0613,
|
|
"step": 15270
|
|
},
|
|
{
|
|
"epoch": 3.3,
|
|
"learning_rate": 1.3704005006219189e-05,
|
|
"loss": 3.0073,
|
|
"step": 15280
|
|
},
|
|
{
|
|
"epoch": 3.3,
|
|
"learning_rate": 1.3672827413181207e-05,
|
|
"loss": 3.0438,
|
|
"step": 15290
|
|
},
|
|
{
|
|
"epoch": 3.3,
|
|
"learning_rate": 1.3641671971908781e-05,
|
|
"loss": 3.05,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 3.31,
|
|
"learning_rate": 1.3610538743330443e-05,
|
|
"loss": 3.0315,
|
|
"step": 15310
|
|
},
|
|
{
|
|
"epoch": 3.31,
|
|
"learning_rate": 1.35794277883313e-05,
|
|
"loss": 3.0834,
|
|
"step": 15320
|
|
},
|
|
{
|
|
"epoch": 3.31,
|
|
"learning_rate": 1.3548339167752888e-05,
|
|
"loss": 3.0867,
|
|
"step": 15330
|
|
},
|
|
{
|
|
"epoch": 3.31,
|
|
"learning_rate": 1.3517272942393055e-05,
|
|
"loss": 3.0431,
|
|
"step": 15340
|
|
},
|
|
{
|
|
"epoch": 3.31,
|
|
"learning_rate": 1.348622917300587e-05,
|
|
"loss": 3.1059,
|
|
"step": 15350
|
|
},
|
|
{
|
|
"epoch": 3.32,
|
|
"learning_rate": 1.3455207920301477e-05,
|
|
"loss": 3.0624,
|
|
"step": 15360
|
|
},
|
|
{
|
|
"epoch": 3.32,
|
|
"learning_rate": 1.3424209244945984e-05,
|
|
"loss": 3.0538,
|
|
"step": 15370
|
|
},
|
|
{
|
|
"epoch": 3.32,
|
|
"learning_rate": 1.3393233207561356e-05,
|
|
"loss": 3.0448,
|
|
"step": 15380
|
|
},
|
|
{
|
|
"epoch": 3.32,
|
|
"learning_rate": 1.3362279868725278e-05,
|
|
"loss": 3.0791,
|
|
"step": 15390
|
|
},
|
|
{
|
|
"epoch": 3.32,
|
|
"learning_rate": 1.3331349288971046e-05,
|
|
"loss": 3.0532,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 3.33,
|
|
"learning_rate": 1.3300441528787449e-05,
|
|
"loss": 3.062,
|
|
"step": 15410
|
|
},
|
|
{
|
|
"epoch": 3.33,
|
|
"learning_rate": 1.3269556648618648e-05,
|
|
"loss": 3.0507,
|
|
"step": 15420
|
|
},
|
|
{
|
|
"epoch": 3.33,
|
|
"learning_rate": 1.3238694708864063e-05,
|
|
"loss": 3.0964,
|
|
"step": 15430
|
|
},
|
|
{
|
|
"epoch": 3.33,
|
|
"learning_rate": 1.3207855769878247e-05,
|
|
"loss": 3.0408,
|
|
"step": 15440
|
|
},
|
|
{
|
|
"epoch": 3.34,
|
|
"learning_rate": 1.3177039891970777e-05,
|
|
"loss": 3.0065,
|
|
"step": 15450
|
|
},
|
|
{
|
|
"epoch": 3.34,
|
|
"learning_rate": 1.314624713540612e-05,
|
|
"loss": 3.0178,
|
|
"step": 15460
|
|
},
|
|
{
|
|
"epoch": 3.34,
|
|
"learning_rate": 1.3115477560403532e-05,
|
|
"loss": 3.0697,
|
|
"step": 15470
|
|
},
|
|
{
|
|
"epoch": 3.34,
|
|
"learning_rate": 1.3084731227136948e-05,
|
|
"loss": 3.089,
|
|
"step": 15480
|
|
},
|
|
{
|
|
"epoch": 3.34,
|
|
"learning_rate": 1.3054008195734834e-05,
|
|
"loss": 3.0363,
|
|
"step": 15490
|
|
},
|
|
{
|
|
"epoch": 3.35,
|
|
"learning_rate": 1.3023308526280093e-05,
|
|
"loss": 3.0394,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 3.35,
|
|
"learning_rate": 1.2992632278809933e-05,
|
|
"loss": 3.0578,
|
|
"step": 15510
|
|
},
|
|
{
|
|
"epoch": 3.35,
|
|
"learning_rate": 1.2961979513315764e-05,
|
|
"loss": 3.0156,
|
|
"step": 15520
|
|
},
|
|
{
|
|
"epoch": 3.35,
|
|
"learning_rate": 1.2931350289743077e-05,
|
|
"loss": 3.0096,
|
|
"step": 15530
|
|
},
|
|
{
|
|
"epoch": 3.35,
|
|
"learning_rate": 1.2900744667991316e-05,
|
|
"loss": 3.0001,
|
|
"step": 15540
|
|
},
|
|
{
|
|
"epoch": 3.36,
|
|
"learning_rate": 1.287016270791377e-05,
|
|
"loss": 3.0078,
|
|
"step": 15550
|
|
},
|
|
{
|
|
"epoch": 3.36,
|
|
"learning_rate": 1.2839604469317462e-05,
|
|
"loss": 3.0303,
|
|
"step": 15560
|
|
},
|
|
{
|
|
"epoch": 3.36,
|
|
"learning_rate": 1.2809070011963014e-05,
|
|
"loss": 3.0857,
|
|
"step": 15570
|
|
},
|
|
{
|
|
"epoch": 3.36,
|
|
"learning_rate": 1.2778559395564548e-05,
|
|
"loss": 3.0598,
|
|
"step": 15580
|
|
},
|
|
{
|
|
"epoch": 3.37,
|
|
"learning_rate": 1.2748072679789564e-05,
|
|
"loss": 3.0147,
|
|
"step": 15590
|
|
},
|
|
{
|
|
"epoch": 3.37,
|
|
"learning_rate": 1.2717609924258811e-05,
|
|
"loss": 3.0968,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 3.37,
|
|
"learning_rate": 1.2687171188546187e-05,
|
|
"loss": 3.015,
|
|
"step": 15610
|
|
},
|
|
{
|
|
"epoch": 3.37,
|
|
"learning_rate": 1.2656756532178615e-05,
|
|
"loss": 3.0335,
|
|
"step": 15620
|
|
},
|
|
{
|
|
"epoch": 3.37,
|
|
"learning_rate": 1.2626366014635932e-05,
|
|
"loss": 3.0618,
|
|
"step": 15630
|
|
},
|
|
{
|
|
"epoch": 3.38,
|
|
"learning_rate": 1.2595999695350766e-05,
|
|
"loss": 3.0203,
|
|
"step": 15640
|
|
},
|
|
{
|
|
"epoch": 3.38,
|
|
"learning_rate": 1.2565657633708416e-05,
|
|
"loss": 3.0543,
|
|
"step": 15650
|
|
},
|
|
{
|
|
"epoch": 3.38,
|
|
"learning_rate": 1.2535339889046749e-05,
|
|
"loss": 3.0578,
|
|
"step": 15660
|
|
},
|
|
{
|
|
"epoch": 3.38,
|
|
"learning_rate": 1.2505046520656073e-05,
|
|
"loss": 3.0704,
|
|
"step": 15670
|
|
},
|
|
{
|
|
"epoch": 3.39,
|
|
"learning_rate": 1.2474777587779018e-05,
|
|
"loss": 3.0626,
|
|
"step": 15680
|
|
},
|
|
{
|
|
"epoch": 3.39,
|
|
"learning_rate": 1.2444533149610457e-05,
|
|
"loss": 3.0391,
|
|
"step": 15690
|
|
},
|
|
{
|
|
"epoch": 3.39,
|
|
"learning_rate": 1.2414313265297329e-05,
|
|
"loss": 3.0568,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 3.39,
|
|
"learning_rate": 1.2384117993938566e-05,
|
|
"loss": 3.0478,
|
|
"step": 15710
|
|
},
|
|
{
|
|
"epoch": 3.39,
|
|
"learning_rate": 1.2353947394584961e-05,
|
|
"loss": 3.0834,
|
|
"step": 15720
|
|
},
|
|
{
|
|
"epoch": 3.4,
|
|
"learning_rate": 1.2323801526239068e-05,
|
|
"loss": 3.0699,
|
|
"step": 15730
|
|
},
|
|
{
|
|
"epoch": 3.4,
|
|
"learning_rate": 1.2293680447855067e-05,
|
|
"loss": 3.0144,
|
|
"step": 15740
|
|
},
|
|
{
|
|
"epoch": 3.4,
|
|
"learning_rate": 1.2263584218338658e-05,
|
|
"loss": 3.027,
|
|
"step": 15750
|
|
},
|
|
{
|
|
"epoch": 3.4,
|
|
"learning_rate": 1.2233512896546944e-05,
|
|
"loss": 3.0298,
|
|
"step": 15760
|
|
},
|
|
{
|
|
"epoch": 3.4,
|
|
"learning_rate": 1.2203466541288344e-05,
|
|
"loss": 3.0119,
|
|
"step": 15770
|
|
},
|
|
{
|
|
"epoch": 3.41,
|
|
"learning_rate": 1.2173445211322415e-05,
|
|
"loss": 3.0436,
|
|
"step": 15780
|
|
},
|
|
{
|
|
"epoch": 3.41,
|
|
"learning_rate": 1.2143448965359793e-05,
|
|
"loss": 3.0477,
|
|
"step": 15790
|
|
},
|
|
{
|
|
"epoch": 3.41,
|
|
"learning_rate": 1.2113477862062053e-05,
|
|
"loss": 3.0715,
|
|
"step": 15800
|
|
},
|
|
{
|
|
"epoch": 3.41,
|
|
"learning_rate": 1.2083531960041605e-05,
|
|
"loss": 3.0144,
|
|
"step": 15810
|
|
},
|
|
{
|
|
"epoch": 3.42,
|
|
"learning_rate": 1.2053611317861568e-05,
|
|
"loss": 3.0531,
|
|
"step": 15820
|
|
},
|
|
{
|
|
"epoch": 3.42,
|
|
"learning_rate": 1.202371599403567e-05,
|
|
"loss": 3.0482,
|
|
"step": 15830
|
|
},
|
|
{
|
|
"epoch": 3.42,
|
|
"learning_rate": 1.1993846047028117e-05,
|
|
"loss": 3.0403,
|
|
"step": 15840
|
|
},
|
|
{
|
|
"epoch": 3.42,
|
|
"learning_rate": 1.1964001535253496e-05,
|
|
"loss": 3.065,
|
|
"step": 15850
|
|
},
|
|
{
|
|
"epoch": 3.42,
|
|
"learning_rate": 1.193418251707665e-05,
|
|
"loss": 3.0052,
|
|
"step": 15860
|
|
},
|
|
{
|
|
"epoch": 3.43,
|
|
"learning_rate": 1.1904389050812558e-05,
|
|
"loss": 3.0018,
|
|
"step": 15870
|
|
},
|
|
{
|
|
"epoch": 3.43,
|
|
"learning_rate": 1.187462119472623e-05,
|
|
"loss": 3.0484,
|
|
"step": 15880
|
|
},
|
|
{
|
|
"epoch": 3.43,
|
|
"learning_rate": 1.1844879007032613e-05,
|
|
"loss": 3.0394,
|
|
"step": 15890
|
|
},
|
|
{
|
|
"epoch": 3.43,
|
|
"learning_rate": 1.1815162545896435e-05,
|
|
"loss": 3.0637,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 3.43,
|
|
"learning_rate": 1.178547186943211e-05,
|
|
"loss": 3.0781,
|
|
"step": 15910
|
|
},
|
|
{
|
|
"epoch": 3.44,
|
|
"learning_rate": 1.1755807035703643e-05,
|
|
"loss": 3.0903,
|
|
"step": 15920
|
|
},
|
|
{
|
|
"epoch": 3.44,
|
|
"learning_rate": 1.1726168102724484e-05,
|
|
"loss": 3.0391,
|
|
"step": 15930
|
|
},
|
|
{
|
|
"epoch": 3.44,
|
|
"learning_rate": 1.1696555128457437e-05,
|
|
"loss": 3.0797,
|
|
"step": 15940
|
|
},
|
|
{
|
|
"epoch": 3.44,
|
|
"learning_rate": 1.1666968170814549e-05,
|
|
"loss": 3.0513,
|
|
"step": 15950
|
|
},
|
|
{
|
|
"epoch": 3.45,
|
|
"learning_rate": 1.1637407287656974e-05,
|
|
"loss": 3.0669,
|
|
"step": 15960
|
|
},
|
|
{
|
|
"epoch": 3.45,
|
|
"learning_rate": 1.1607872536794883e-05,
|
|
"loss": 3.0438,
|
|
"step": 15970
|
|
},
|
|
{
|
|
"epoch": 3.45,
|
|
"learning_rate": 1.1578363975987338e-05,
|
|
"loss": 3.086,
|
|
"step": 15980
|
|
},
|
|
{
|
|
"epoch": 3.45,
|
|
"learning_rate": 1.1548881662942185e-05,
|
|
"loss": 3.0371,
|
|
"step": 15990
|
|
},
|
|
{
|
|
"epoch": 3.45,
|
|
"learning_rate": 1.1519425655315939e-05,
|
|
"loss": 3.0572,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 3.46,
|
|
"learning_rate": 1.1489996010713667e-05,
|
|
"loss": 3.0275,
|
|
"step": 16010
|
|
},
|
|
{
|
|
"epoch": 3.46,
|
|
"learning_rate": 1.1460592786688887e-05,
|
|
"loss": 3.0097,
|
|
"step": 16020
|
|
},
|
|
{
|
|
"epoch": 3.46,
|
|
"learning_rate": 1.1431216040743442e-05,
|
|
"loss": 3.0145,
|
|
"step": 16030
|
|
},
|
|
{
|
|
"epoch": 3.46,
|
|
"learning_rate": 1.1401865830327397e-05,
|
|
"loss": 3.053,
|
|
"step": 16040
|
|
},
|
|
{
|
|
"epoch": 3.47,
|
|
"learning_rate": 1.1372542212838919e-05,
|
|
"loss": 3.0638,
|
|
"step": 16050
|
|
},
|
|
{
|
|
"epoch": 3.47,
|
|
"learning_rate": 1.1343245245624176e-05,
|
|
"loss": 3.0592,
|
|
"step": 16060
|
|
},
|
|
{
|
|
"epoch": 3.47,
|
|
"learning_rate": 1.1313974985977216e-05,
|
|
"loss": 3.0128,
|
|
"step": 16070
|
|
},
|
|
{
|
|
"epoch": 3.47,
|
|
"learning_rate": 1.1284731491139849e-05,
|
|
"loss": 3.0444,
|
|
"step": 16080
|
|
},
|
|
{
|
|
"epoch": 3.47,
|
|
"learning_rate": 1.1255514818301543e-05,
|
|
"loss": 3.0062,
|
|
"step": 16090
|
|
},
|
|
{
|
|
"epoch": 3.48,
|
|
"learning_rate": 1.1226325024599337e-05,
|
|
"loss": 3.0262,
|
|
"step": 16100
|
|
},
|
|
{
|
|
"epoch": 3.48,
|
|
"learning_rate": 1.1197162167117677e-05,
|
|
"loss": 3.0214,
|
|
"step": 16110
|
|
},
|
|
{
|
|
"epoch": 3.48,
|
|
"learning_rate": 1.1168026302888338e-05,
|
|
"loss": 3.0697,
|
|
"step": 16120
|
|
},
|
|
{
|
|
"epoch": 3.48,
|
|
"learning_rate": 1.113891748889031e-05,
|
|
"loss": 3.0522,
|
|
"step": 16130
|
|
},
|
|
{
|
|
"epoch": 3.48,
|
|
"learning_rate": 1.110983578204968e-05,
|
|
"loss": 3.0469,
|
|
"step": 16140
|
|
},
|
|
{
|
|
"epoch": 3.49,
|
|
"learning_rate": 1.1080781239239522e-05,
|
|
"loss": 3.061,
|
|
"step": 16150
|
|
},
|
|
{
|
|
"epoch": 3.49,
|
|
"learning_rate": 1.1051753917279791e-05,
|
|
"loss": 3.0644,
|
|
"step": 16160
|
|
},
|
|
{
|
|
"epoch": 3.49,
|
|
"learning_rate": 1.102275387293721e-05,
|
|
"loss": 3.0725,
|
|
"step": 16170
|
|
},
|
|
{
|
|
"epoch": 3.49,
|
|
"learning_rate": 1.0993781162925152e-05,
|
|
"loss": 3.0252,
|
|
"step": 16180
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"learning_rate": 1.0964835843903534e-05,
|
|
"loss": 3.0684,
|
|
"step": 16190
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"learning_rate": 1.093591797247871e-05,
|
|
"loss": 3.0862,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"learning_rate": 1.0907027605203355e-05,
|
|
"loss": 3.0325,
|
|
"step": 16210
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"learning_rate": 1.0878164798576346e-05,
|
|
"loss": 3.1146,
|
|
"step": 16220
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"learning_rate": 1.0849329609042689e-05,
|
|
"loss": 3.0475,
|
|
"step": 16230
|
|
},
|
|
{
|
|
"epoch": 3.51,
|
|
"learning_rate": 1.0820522092993355e-05,
|
|
"loss": 3.0469,
|
|
"step": 16240
|
|
},
|
|
{
|
|
"epoch": 3.51,
|
|
"learning_rate": 1.0791742306765205e-05,
|
|
"loss": 3.0327,
|
|
"step": 16250
|
|
},
|
|
{
|
|
"epoch": 3.51,
|
|
"learning_rate": 1.0762990306640868e-05,
|
|
"loss": 3.0678,
|
|
"step": 16260
|
|
},
|
|
{
|
|
"epoch": 3.51,
|
|
"learning_rate": 1.0734266148848641e-05,
|
|
"loss": 3.0684,
|
|
"step": 16270
|
|
},
|
|
{
|
|
"epoch": 3.51,
|
|
"learning_rate": 1.0705569889562361e-05,
|
|
"loss": 3.025,
|
|
"step": 16280
|
|
},
|
|
{
|
|
"epoch": 3.52,
|
|
"learning_rate": 1.0676901584901306e-05,
|
|
"loss": 3.1052,
|
|
"step": 16290
|
|
},
|
|
{
|
|
"epoch": 3.52,
|
|
"learning_rate": 1.0648261290930106e-05,
|
|
"loss": 3.0832,
|
|
"step": 16300
|
|
},
|
|
{
|
|
"epoch": 3.52,
|
|
"learning_rate": 1.0619649063658588e-05,
|
|
"loss": 3.0804,
|
|
"step": 16310
|
|
},
|
|
{
|
|
"epoch": 3.52,
|
|
"learning_rate": 1.0591064959041702e-05,
|
|
"loss": 3.0272,
|
|
"step": 16320
|
|
},
|
|
{
|
|
"epoch": 3.53,
|
|
"learning_rate": 1.0562509032979398e-05,
|
|
"loss": 3.0792,
|
|
"step": 16330
|
|
},
|
|
{
|
|
"epoch": 3.53,
|
|
"learning_rate": 1.0533981341316518e-05,
|
|
"loss": 3.0582,
|
|
"step": 16340
|
|
},
|
|
{
|
|
"epoch": 3.53,
|
|
"learning_rate": 1.050548193984269e-05,
|
|
"loss": 3.0292,
|
|
"step": 16350
|
|
},
|
|
{
|
|
"epoch": 3.53,
|
|
"learning_rate": 1.0477010884292218e-05,
|
|
"loss": 3.0363,
|
|
"step": 16360
|
|
},
|
|
{
|
|
"epoch": 3.53,
|
|
"learning_rate": 1.0448568230343967e-05,
|
|
"loss": 3.0162,
|
|
"step": 16370
|
|
},
|
|
{
|
|
"epoch": 3.54,
|
|
"learning_rate": 1.042015403362126e-05,
|
|
"loss": 3.0717,
|
|
"step": 16380
|
|
},
|
|
{
|
|
"epoch": 3.54,
|
|
"learning_rate": 1.0391768349691774e-05,
|
|
"loss": 3.0834,
|
|
"step": 16390
|
|
},
|
|
{
|
|
"epoch": 3.54,
|
|
"learning_rate": 1.0363411234067424e-05,
|
|
"loss": 3.0954,
|
|
"step": 16400
|
|
},
|
|
{
|
|
"epoch": 3.54,
|
|
"learning_rate": 1.0335082742204249e-05,
|
|
"loss": 3.1055,
|
|
"step": 16410
|
|
},
|
|
{
|
|
"epoch": 3.54,
|
|
"learning_rate": 1.0306782929502318e-05,
|
|
"loss": 3.0769,
|
|
"step": 16420
|
|
},
|
|
{
|
|
"epoch": 3.55,
|
|
"learning_rate": 1.0278511851305608e-05,
|
|
"loss": 3.0373,
|
|
"step": 16430
|
|
},
|
|
{
|
|
"epoch": 3.55,
|
|
"learning_rate": 1.0250269562901907e-05,
|
|
"loss": 3.0572,
|
|
"step": 16440
|
|
},
|
|
{
|
|
"epoch": 3.55,
|
|
"learning_rate": 1.02220561195227e-05,
|
|
"loss": 2.9998,
|
|
"step": 16450
|
|
},
|
|
{
|
|
"epoch": 3.55,
|
|
"learning_rate": 1.0193871576343062e-05,
|
|
"loss": 3.0439,
|
|
"step": 16460
|
|
},
|
|
{
|
|
"epoch": 3.56,
|
|
"learning_rate": 1.0165715988481545e-05,
|
|
"loss": 3.0393,
|
|
"step": 16470
|
|
},
|
|
{
|
|
"epoch": 3.56,
|
|
"learning_rate": 1.0137589411000079e-05,
|
|
"loss": 3.0156,
|
|
"step": 16480
|
|
},
|
|
{
|
|
"epoch": 3.56,
|
|
"learning_rate": 1.0109491898903863e-05,
|
|
"loss": 3.0427,
|
|
"step": 16490
|
|
},
|
|
{
|
|
"epoch": 3.56,
|
|
"learning_rate": 1.008142350714124e-05,
|
|
"loss": 3.0648,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 3.56,
|
|
"learning_rate": 1.005338429060364e-05,
|
|
"loss": 3.0658,
|
|
"step": 16510
|
|
},
|
|
{
|
|
"epoch": 3.57,
|
|
"learning_rate": 1.0025374304125399e-05,
|
|
"loss": 3.0478,
|
|
"step": 16520
|
|
},
|
|
{
|
|
"epoch": 3.57,
|
|
"learning_rate": 9.997393602483705e-06,
|
|
"loss": 3.0559,
|
|
"step": 16530
|
|
},
|
|
{
|
|
"epoch": 3.57,
|
|
"learning_rate": 9.969442240398474e-06,
|
|
"loss": 3.0437,
|
|
"step": 16540
|
|
},
|
|
{
|
|
"epoch": 3.57,
|
|
"learning_rate": 9.94152027253225e-06,
|
|
"loss": 3.0383,
|
|
"step": 16550
|
|
},
|
|
{
|
|
"epoch": 3.58,
|
|
"learning_rate": 9.913627753490084e-06,
|
|
"loss": 3.0485,
|
|
"step": 16560
|
|
},
|
|
{
|
|
"epoch": 3.58,
|
|
"learning_rate": 9.885764737819444e-06,
|
|
"loss": 3.0746,
|
|
"step": 16570
|
|
},
|
|
{
|
|
"epoch": 3.58,
|
|
"learning_rate": 9.857931280010094e-06,
|
|
"loss": 2.9857,
|
|
"step": 16580
|
|
},
|
|
{
|
|
"epoch": 3.58,
|
|
"learning_rate": 9.830127434493997e-06,
|
|
"loss": 3.0314,
|
|
"step": 16590
|
|
},
|
|
{
|
|
"epoch": 3.58,
|
|
"learning_rate": 9.802353255645202e-06,
|
|
"loss": 3.0557,
|
|
"step": 16600
|
|
},
|
|
{
|
|
"epoch": 3.59,
|
|
"learning_rate": 9.77460879777975e-06,
|
|
"loss": 3.0553,
|
|
"step": 16610
|
|
},
|
|
{
|
|
"epoch": 3.59,
|
|
"learning_rate": 9.746894115155547e-06,
|
|
"loss": 3.0923,
|
|
"step": 16620
|
|
},
|
|
{
|
|
"epoch": 3.59,
|
|
"learning_rate": 9.719209261972279e-06,
|
|
"loss": 3.1046,
|
|
"step": 16630
|
|
},
|
|
{
|
|
"epoch": 3.59,
|
|
"learning_rate": 9.691554292371285e-06,
|
|
"loss": 3.0573,
|
|
"step": 16640
|
|
},
|
|
{
|
|
"epoch": 3.59,
|
|
"learning_rate": 9.66392926043548e-06,
|
|
"loss": 3.0554,
|
|
"step": 16650
|
|
},
|
|
{
|
|
"epoch": 3.6,
|
|
"learning_rate": 9.636334220189216e-06,
|
|
"loss": 3.0426,
|
|
"step": 16660
|
|
},
|
|
{
|
|
"epoch": 3.6,
|
|
"learning_rate": 9.608769225598193e-06,
|
|
"loss": 3.0102,
|
|
"step": 16670
|
|
},
|
|
{
|
|
"epoch": 3.6,
|
|
"learning_rate": 9.581234330569375e-06,
|
|
"loss": 3.0427,
|
|
"step": 16680
|
|
},
|
|
{
|
|
"epoch": 3.6,
|
|
"learning_rate": 9.553729588950838e-06,
|
|
"loss": 3.0588,
|
|
"step": 16690
|
|
},
|
|
{
|
|
"epoch": 3.61,
|
|
"learning_rate": 9.526255054531694e-06,
|
|
"loss": 3.0561,
|
|
"step": 16700
|
|
},
|
|
{
|
|
"epoch": 3.61,
|
|
"learning_rate": 9.498810781041986e-06,
|
|
"loss": 3.0464,
|
|
"step": 16710
|
|
},
|
|
{
|
|
"epoch": 3.61,
|
|
"learning_rate": 9.471396822152579e-06,
|
|
"loss": 3.0462,
|
|
"step": 16720
|
|
},
|
|
{
|
|
"epoch": 3.61,
|
|
"learning_rate": 9.444013231475043e-06,
|
|
"loss": 3.0231,
|
|
"step": 16730
|
|
},
|
|
{
|
|
"epoch": 3.61,
|
|
"learning_rate": 9.41666006256156e-06,
|
|
"loss": 3.0708,
|
|
"step": 16740
|
|
},
|
|
{
|
|
"epoch": 3.62,
|
|
"learning_rate": 9.389337368904849e-06,
|
|
"loss": 3.0786,
|
|
"step": 16750
|
|
},
|
|
{
|
|
"epoch": 3.62,
|
|
"learning_rate": 9.362045203937989e-06,
|
|
"loss": 3.0496,
|
|
"step": 16760
|
|
},
|
|
{
|
|
"epoch": 3.62,
|
|
"learning_rate": 9.334783621034377e-06,
|
|
"loss": 3.0493,
|
|
"step": 16770
|
|
},
|
|
{
|
|
"epoch": 3.62,
|
|
"learning_rate": 9.3075526735076e-06,
|
|
"loss": 3.0818,
|
|
"step": 16780
|
|
},
|
|
{
|
|
"epoch": 3.62,
|
|
"learning_rate": 9.280352414611332e-06,
|
|
"loss": 3.0362,
|
|
"step": 16790
|
|
},
|
|
{
|
|
"epoch": 3.63,
|
|
"learning_rate": 9.25318289753923e-06,
|
|
"loss": 3.0442,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 3.63,
|
|
"learning_rate": 9.22604417542484e-06,
|
|
"loss": 3.0519,
|
|
"step": 16810
|
|
},
|
|
{
|
|
"epoch": 3.63,
|
|
"learning_rate": 9.19893630134147e-06,
|
|
"loss": 3.0601,
|
|
"step": 16820
|
|
},
|
|
{
|
|
"epoch": 3.63,
|
|
"learning_rate": 9.171859328302112e-06,
|
|
"loss": 3.0114,
|
|
"step": 16830
|
|
},
|
|
{
|
|
"epoch": 3.64,
|
|
"learning_rate": 9.144813309259328e-06,
|
|
"loss": 3.0107,
|
|
"step": 16840
|
|
},
|
|
{
|
|
"epoch": 3.64,
|
|
"learning_rate": 9.117798297105135e-06,
|
|
"loss": 3.0195,
|
|
"step": 16850
|
|
},
|
|
{
|
|
"epoch": 3.64,
|
|
"learning_rate": 9.09081434467092e-06,
|
|
"loss": 3.0925,
|
|
"step": 16860
|
|
},
|
|
{
|
|
"epoch": 3.64,
|
|
"learning_rate": 9.063861504727326e-06,
|
|
"loss": 3.0653,
|
|
"step": 16870
|
|
},
|
|
{
|
|
"epoch": 3.64,
|
|
"learning_rate": 9.03693982998415e-06,
|
|
"loss": 3.0273,
|
|
"step": 16880
|
|
},
|
|
{
|
|
"epoch": 3.65,
|
|
"learning_rate": 9.010049373090252e-06,
|
|
"loss": 3.0293,
|
|
"step": 16890
|
|
},
|
|
{
|
|
"epoch": 3.65,
|
|
"learning_rate": 8.983190186633422e-06,
|
|
"loss": 3.0186,
|
|
"step": 16900
|
|
},
|
|
{
|
|
"epoch": 3.65,
|
|
"learning_rate": 8.956362323140307e-06,
|
|
"loss": 3.038,
|
|
"step": 16910
|
|
},
|
|
{
|
|
"epoch": 3.65,
|
|
"learning_rate": 8.929565835076312e-06,
|
|
"loss": 3.0942,
|
|
"step": 16920
|
|
},
|
|
{
|
|
"epoch": 3.66,
|
|
"learning_rate": 8.90280077484546e-06,
|
|
"loss": 3.0066,
|
|
"step": 16930
|
|
},
|
|
{
|
|
"epoch": 3.66,
|
|
"learning_rate": 8.876067194790325e-06,
|
|
"loss": 3.0067,
|
|
"step": 16940
|
|
},
|
|
{
|
|
"epoch": 3.66,
|
|
"learning_rate": 8.849365147191915e-06,
|
|
"loss": 3.0401,
|
|
"step": 16950
|
|
},
|
|
{
|
|
"epoch": 3.66,
|
|
"learning_rate": 8.822694684269569e-06,
|
|
"loss": 3.0869,
|
|
"step": 16960
|
|
},
|
|
{
|
|
"epoch": 3.66,
|
|
"learning_rate": 8.796055858180862e-06,
|
|
"loss": 3.0214,
|
|
"step": 16970
|
|
},
|
|
{
|
|
"epoch": 3.67,
|
|
"learning_rate": 8.7694487210215e-06,
|
|
"loss": 3.0488,
|
|
"step": 16980
|
|
},
|
|
{
|
|
"epoch": 3.67,
|
|
"learning_rate": 8.742873324825213e-06,
|
|
"loss": 3.0232,
|
|
"step": 16990
|
|
},
|
|
{
|
|
"epoch": 3.67,
|
|
"learning_rate": 8.716329721563662e-06,
|
|
"loss": 3.0763,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 3.67,
|
|
"learning_rate": 8.689817963146327e-06,
|
|
"loss": 3.0195,
|
|
"step": 17010
|
|
},
|
|
{
|
|
"epoch": 3.67,
|
|
"learning_rate": 8.663338101420414e-06,
|
|
"loss": 3.0491,
|
|
"step": 17020
|
|
},
|
|
{
|
|
"epoch": 3.68,
|
|
"learning_rate": 8.636890188170757e-06,
|
|
"loss": 3.0477,
|
|
"step": 17030
|
|
},
|
|
{
|
|
"epoch": 3.68,
|
|
"learning_rate": 8.610474275119702e-06,
|
|
"loss": 3.0163,
|
|
"step": 17040
|
|
},
|
|
{
|
|
"epoch": 3.68,
|
|
"learning_rate": 8.584090413927014e-06,
|
|
"loss": 3.0518,
|
|
"step": 17050
|
|
},
|
|
{
|
|
"epoch": 3.68,
|
|
"learning_rate": 8.557738656189784e-06,
|
|
"loss": 3.0356,
|
|
"step": 17060
|
|
},
|
|
{
|
|
"epoch": 3.69,
|
|
"learning_rate": 8.531419053442315e-06,
|
|
"loss": 3.0473,
|
|
"step": 17070
|
|
},
|
|
{
|
|
"epoch": 3.69,
|
|
"learning_rate": 8.505131657156032e-06,
|
|
"loss": 3.0072,
|
|
"step": 17080
|
|
},
|
|
{
|
|
"epoch": 3.69,
|
|
"learning_rate": 8.478876518739364e-06,
|
|
"loss": 3.0617,
|
|
"step": 17090
|
|
},
|
|
{
|
|
"epoch": 3.69,
|
|
"learning_rate": 8.45265368953767e-06,
|
|
"loss": 3.0753,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 3.69,
|
|
"learning_rate": 8.426463220833109e-06,
|
|
"loss": 3.0968,
|
|
"step": 17110
|
|
},
|
|
{
|
|
"epoch": 3.7,
|
|
"learning_rate": 8.400305163844577e-06,
|
|
"loss": 3.1078,
|
|
"step": 17120
|
|
},
|
|
{
|
|
"epoch": 3.7,
|
|
"learning_rate": 8.374179569727563e-06,
|
|
"loss": 3.0648,
|
|
"step": 17130
|
|
},
|
|
{
|
|
"epoch": 3.7,
|
|
"learning_rate": 8.348086489574084e-06,
|
|
"loss": 3.0642,
|
|
"step": 17140
|
|
},
|
|
{
|
|
"epoch": 3.7,
|
|
"learning_rate": 8.32202597441256e-06,
|
|
"loss": 3.0434,
|
|
"step": 17150
|
|
},
|
|
{
|
|
"epoch": 3.7,
|
|
"learning_rate": 8.295998075207736e-06,
|
|
"loss": 3.0191,
|
|
"step": 17160
|
|
},
|
|
{
|
|
"epoch": 3.71,
|
|
"learning_rate": 8.270002842860569e-06,
|
|
"loss": 3.041,
|
|
"step": 17170
|
|
},
|
|
{
|
|
"epoch": 3.71,
|
|
"learning_rate": 8.24404032820813e-06,
|
|
"loss": 3.0984,
|
|
"step": 17180
|
|
},
|
|
{
|
|
"epoch": 3.71,
|
|
"learning_rate": 8.218110582023512e-06,
|
|
"loss": 3.0511,
|
|
"step": 17190
|
|
},
|
|
{
|
|
"epoch": 3.71,
|
|
"learning_rate": 8.192213655015704e-06,
|
|
"loss": 3.0083,
|
|
"step": 17200
|
|
},
|
|
{
|
|
"epoch": 3.72,
|
|
"learning_rate": 8.166349597829551e-06,
|
|
"loss": 3.0806,
|
|
"step": 17210
|
|
},
|
|
{
|
|
"epoch": 3.72,
|
|
"learning_rate": 8.140518461045588e-06,
|
|
"loss": 3.0186,
|
|
"step": 17220
|
|
},
|
|
{
|
|
"epoch": 3.72,
|
|
"learning_rate": 8.114720295179973e-06,
|
|
"loss": 3.0383,
|
|
"step": 17230
|
|
},
|
|
{
|
|
"epoch": 3.72,
|
|
"learning_rate": 8.088955150684393e-06,
|
|
"loss": 3.0436,
|
|
"step": 17240
|
|
},
|
|
{
|
|
"epoch": 3.72,
|
|
"learning_rate": 8.063223077945956e-06,
|
|
"loss": 3.0402,
|
|
"step": 17250
|
|
},
|
|
{
|
|
"epoch": 3.73,
|
|
"learning_rate": 8.037524127287083e-06,
|
|
"loss": 3.0254,
|
|
"step": 17260
|
|
},
|
|
{
|
|
"epoch": 3.73,
|
|
"learning_rate": 8.011858348965435e-06,
|
|
"loss": 3.0468,
|
|
"step": 17270
|
|
},
|
|
{
|
|
"epoch": 3.73,
|
|
"learning_rate": 7.98622579317379e-06,
|
|
"loss": 3.033,
|
|
"step": 17280
|
|
},
|
|
{
|
|
"epoch": 3.73,
|
|
"learning_rate": 7.960626510039965e-06,
|
|
"loss": 3.0599,
|
|
"step": 17290
|
|
},
|
|
{
|
|
"epoch": 3.73,
|
|
"learning_rate": 7.935060549626696e-06,
|
|
"loss": 3.0692,
|
|
"step": 17300
|
|
},
|
|
{
|
|
"epoch": 3.74,
|
|
"learning_rate": 7.909527961931562e-06,
|
|
"loss": 3.1465,
|
|
"step": 17310
|
|
},
|
|
{
|
|
"epoch": 3.74,
|
|
"learning_rate": 7.884028796886863e-06,
|
|
"loss": 3.0744,
|
|
"step": 17320
|
|
},
|
|
{
|
|
"epoch": 3.74,
|
|
"learning_rate": 7.858563104359565e-06,
|
|
"loss": 3.0674,
|
|
"step": 17330
|
|
},
|
|
{
|
|
"epoch": 3.74,
|
|
"learning_rate": 7.833130934151145e-06,
|
|
"loss": 3.0385,
|
|
"step": 17340
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"learning_rate": 7.807732335997537e-06,
|
|
"loss": 3.0108,
|
|
"step": 17350
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"learning_rate": 7.782367359569015e-06,
|
|
"loss": 3.0098,
|
|
"step": 17360
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"learning_rate": 7.757036054470108e-06,
|
|
"loss": 3.0531,
|
|
"step": 17370
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"learning_rate": 7.731738470239483e-06,
|
|
"loss": 3.0734,
|
|
"step": 17380
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"learning_rate": 7.70647465634988e-06,
|
|
"loss": 3.043,
|
|
"step": 17390
|
|
},
|
|
{
|
|
"epoch": 3.76,
|
|
"learning_rate": 7.681244662207979e-06,
|
|
"loss": 3.0281,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 3.76,
|
|
"learning_rate": 7.656048537154336e-06,
|
|
"loss": 2.9923,
|
|
"step": 17410
|
|
},
|
|
{
|
|
"epoch": 3.76,
|
|
"learning_rate": 7.63088633046326e-06,
|
|
"loss": 3.0395,
|
|
"step": 17420
|
|
},
|
|
{
|
|
"epoch": 3.76,
|
|
"learning_rate": 7.605758091342735e-06,
|
|
"loss": 2.9974,
|
|
"step": 17430
|
|
},
|
|
{
|
|
"epoch": 3.77,
|
|
"learning_rate": 7.580663868934315e-06,
|
|
"loss": 3.0726,
|
|
"step": 17440
|
|
},
|
|
{
|
|
"epoch": 3.77,
|
|
"learning_rate": 7.555603712313028e-06,
|
|
"loss": 3.1387,
|
|
"step": 17450
|
|
},
|
|
{
|
|
"epoch": 3.77,
|
|
"learning_rate": 7.530577670487288e-06,
|
|
"loss": 3.008,
|
|
"step": 17460
|
|
},
|
|
{
|
|
"epoch": 3.77,
|
|
"learning_rate": 7.505585792398781e-06,
|
|
"loss": 3.0526,
|
|
"step": 17470
|
|
},
|
|
{
|
|
"epoch": 3.77,
|
|
"learning_rate": 7.480628126922396e-06,
|
|
"loss": 3.1219,
|
|
"step": 17480
|
|
},
|
|
{
|
|
"epoch": 3.78,
|
|
"learning_rate": 7.455704722866105e-06,
|
|
"loss": 3.044,
|
|
"step": 17490
|
|
},
|
|
{
|
|
"epoch": 3.78,
|
|
"learning_rate": 7.430815628970881e-06,
|
|
"loss": 3.0597,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 3.78,
|
|
"learning_rate": 7.405960893910599e-06,
|
|
"loss": 3.0511,
|
|
"step": 17510
|
|
},
|
|
{
|
|
"epoch": 3.78,
|
|
"learning_rate": 7.381140566291928e-06,
|
|
"loss": 3.0602,
|
|
"step": 17520
|
|
},
|
|
{
|
|
"epoch": 3.78,
|
|
"learning_rate": 7.35635469465428e-06,
|
|
"loss": 3.0561,
|
|
"step": 17530
|
|
},
|
|
{
|
|
"epoch": 3.79,
|
|
"learning_rate": 7.331603327469658e-06,
|
|
"loss": 3.0664,
|
|
"step": 17540
|
|
},
|
|
{
|
|
"epoch": 3.79,
|
|
"learning_rate": 7.306886513142589e-06,
|
|
"loss": 3.0127,
|
|
"step": 17550
|
|
},
|
|
{
|
|
"epoch": 3.79,
|
|
"learning_rate": 7.282204300010034e-06,
|
|
"loss": 3.0184,
|
|
"step": 17560
|
|
},
|
|
{
|
|
"epoch": 3.79,
|
|
"learning_rate": 7.2575567363412894e-06,
|
|
"loss": 3.0093,
|
|
"step": 17570
|
|
},
|
|
{
|
|
"epoch": 3.8,
|
|
"learning_rate": 7.232943870337877e-06,
|
|
"loss": 3.1023,
|
|
"step": 17580
|
|
},
|
|
{
|
|
"epoch": 3.8,
|
|
"learning_rate": 7.208365750133478e-06,
|
|
"loss": 3.0619,
|
|
"step": 17590
|
|
},
|
|
{
|
|
"epoch": 3.8,
|
|
"learning_rate": 7.1838224237938125e-06,
|
|
"loss": 3.0545,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 3.8,
|
|
"learning_rate": 7.159313939316564e-06,
|
|
"loss": 3.137,
|
|
"step": 17610
|
|
},
|
|
{
|
|
"epoch": 3.8,
|
|
"learning_rate": 7.134840344631275e-06,
|
|
"loss": 3.0705,
|
|
"step": 17620
|
|
},
|
|
{
|
|
"epoch": 3.81,
|
|
"learning_rate": 7.110401687599255e-06,
|
|
"loss": 3.0324,
|
|
"step": 17630
|
|
},
|
|
{
|
|
"epoch": 3.81,
|
|
"learning_rate": 7.08599801601349e-06,
|
|
"loss": 3.0266,
|
|
"step": 17640
|
|
},
|
|
{
|
|
"epoch": 3.81,
|
|
"learning_rate": 7.061629377598542e-06,
|
|
"loss": 3.0595,
|
|
"step": 17650
|
|
},
|
|
{
|
|
"epoch": 3.81,
|
|
"learning_rate": 7.037295820010481e-06,
|
|
"loss": 3.0355,
|
|
"step": 17660
|
|
},
|
|
{
|
|
"epoch": 3.81,
|
|
"learning_rate": 7.012997390836745e-06,
|
|
"loss": 2.9801,
|
|
"step": 17670
|
|
},
|
|
{
|
|
"epoch": 3.82,
|
|
"learning_rate": 6.988734137596095e-06,
|
|
"loss": 3.0169,
|
|
"step": 17680
|
|
},
|
|
{
|
|
"epoch": 3.82,
|
|
"learning_rate": 6.964506107738486e-06,
|
|
"loss": 3.0667,
|
|
"step": 17690
|
|
},
|
|
{
|
|
"epoch": 3.82,
|
|
"learning_rate": 6.940313348644994e-06,
|
|
"loss": 3.0117,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 3.82,
|
|
"learning_rate": 6.916155907627725e-06,
|
|
"loss": 3.0461,
|
|
"step": 17710
|
|
},
|
|
{
|
|
"epoch": 3.83,
|
|
"learning_rate": 6.892033831929703e-06,
|
|
"loss": 3.0584,
|
|
"step": 17720
|
|
},
|
|
{
|
|
"epoch": 3.83,
|
|
"learning_rate": 6.8679471687247975e-06,
|
|
"loss": 3.0127,
|
|
"step": 17730
|
|
},
|
|
{
|
|
"epoch": 3.83,
|
|
"learning_rate": 6.843895965117636e-06,
|
|
"loss": 3.0464,
|
|
"step": 17740
|
|
},
|
|
{
|
|
"epoch": 3.83,
|
|
"learning_rate": 6.819880268143483e-06,
|
|
"loss": 3.0342,
|
|
"step": 17750
|
|
},
|
|
{
|
|
"epoch": 3.83,
|
|
"learning_rate": 6.795900124768168e-06,
|
|
"loss": 3.0692,
|
|
"step": 17760
|
|
},
|
|
{
|
|
"epoch": 3.84,
|
|
"learning_rate": 6.771955581887998e-06,
|
|
"loss": 3.077,
|
|
"step": 17770
|
|
},
|
|
{
|
|
"epoch": 3.84,
|
|
"learning_rate": 6.748046686329648e-06,
|
|
"loss": 3.0021,
|
|
"step": 17780
|
|
},
|
|
{
|
|
"epoch": 3.84,
|
|
"learning_rate": 6.724173484850094e-06,
|
|
"loss": 3.0601,
|
|
"step": 17790
|
|
},
|
|
{
|
|
"epoch": 3.84,
|
|
"learning_rate": 6.700336024136491e-06,
|
|
"loss": 3.046,
|
|
"step": 17800
|
|
},
|
|
{
|
|
"epoch": 3.84,
|
|
"learning_rate": 6.676534350806116e-06,
|
|
"loss": 3.0126,
|
|
"step": 17810
|
|
},
|
|
{
|
|
"epoch": 3.85,
|
|
"learning_rate": 6.652768511406246e-06,
|
|
"loss": 3.0162,
|
|
"step": 17820
|
|
},
|
|
{
|
|
"epoch": 3.85,
|
|
"learning_rate": 6.629038552414083e-06,
|
|
"loss": 3.0346,
|
|
"step": 17830
|
|
},
|
|
{
|
|
"epoch": 3.85,
|
|
"learning_rate": 6.605344520236662e-06,
|
|
"loss": 3.0149,
|
|
"step": 17840
|
|
},
|
|
{
|
|
"epoch": 3.85,
|
|
"learning_rate": 6.5816864612107595e-06,
|
|
"loss": 3.0577,
|
|
"step": 17850
|
|
},
|
|
{
|
|
"epoch": 3.86,
|
|
"learning_rate": 6.5580644216028e-06,
|
|
"loss": 3.029,
|
|
"step": 17860
|
|
},
|
|
{
|
|
"epoch": 3.86,
|
|
"learning_rate": 6.534478447608766e-06,
|
|
"loss": 3.0367,
|
|
"step": 17870
|
|
},
|
|
{
|
|
"epoch": 3.86,
|
|
"learning_rate": 6.510928585354112e-06,
|
|
"loss": 3.0446,
|
|
"step": 17880
|
|
},
|
|
{
|
|
"epoch": 3.86,
|
|
"learning_rate": 6.487414880893666e-06,
|
|
"loss": 3.0617,
|
|
"step": 17890
|
|
},
|
|
{
|
|
"epoch": 3.86,
|
|
"learning_rate": 6.463937380211555e-06,
|
|
"loss": 3.0387,
|
|
"step": 17900
|
|
},
|
|
{
|
|
"epoch": 3.87,
|
|
"learning_rate": 6.440496129221094e-06,
|
|
"loss": 3.027,
|
|
"step": 17910
|
|
},
|
|
{
|
|
"epoch": 3.87,
|
|
"learning_rate": 6.417091173764711e-06,
|
|
"loss": 3.0221,
|
|
"step": 17920
|
|
},
|
|
{
|
|
"epoch": 3.87,
|
|
"learning_rate": 6.393722559613849e-06,
|
|
"loss": 3.0637,
|
|
"step": 17930
|
|
},
|
|
{
|
|
"epoch": 3.87,
|
|
"learning_rate": 6.370390332468898e-06,
|
|
"loss": 3.0915,
|
|
"step": 17940
|
|
},
|
|
{
|
|
"epoch": 3.88,
|
|
"learning_rate": 6.347094537959067e-06,
|
|
"loss": 2.9943,
|
|
"step": 17950
|
|
},
|
|
{
|
|
"epoch": 3.88,
|
|
"learning_rate": 6.323835221642327e-06,
|
|
"loss": 3.0143,
|
|
"step": 17960
|
|
},
|
|
{
|
|
"epoch": 3.88,
|
|
"learning_rate": 6.30061242900531e-06,
|
|
"loss": 3.0351,
|
|
"step": 17970
|
|
},
|
|
{
|
|
"epoch": 3.88,
|
|
"learning_rate": 6.277426205463219e-06,
|
|
"loss": 3.0477,
|
|
"step": 17980
|
|
},
|
|
{
|
|
"epoch": 3.88,
|
|
"learning_rate": 6.254276596359742e-06,
|
|
"loss": 2.9847,
|
|
"step": 17990
|
|
},
|
|
{
|
|
"epoch": 3.89,
|
|
"learning_rate": 6.231163646966967e-06,
|
|
"loss": 3.0042,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 3.89,
|
|
"learning_rate": 6.208087402485283e-06,
|
|
"loss": 3.0593,
|
|
"step": 18010
|
|
},
|
|
{
|
|
"epoch": 3.89,
|
|
"learning_rate": 6.1850479080432984e-06,
|
|
"loss": 3.0764,
|
|
"step": 18020
|
|
},
|
|
{
|
|
"epoch": 3.89,
|
|
"learning_rate": 6.162045208697759e-06,
|
|
"loss": 3.115,
|
|
"step": 18030
|
|
},
|
|
{
|
|
"epoch": 3.89,
|
|
"learning_rate": 6.1390793494334434e-06,
|
|
"loss": 3.0792,
|
|
"step": 18040
|
|
},
|
|
{
|
|
"epoch": 3.9,
|
|
"learning_rate": 6.11615037516309e-06,
|
|
"loss": 3.0233,
|
|
"step": 18050
|
|
},
|
|
{
|
|
"epoch": 3.9,
|
|
"learning_rate": 6.093258330727306e-06,
|
|
"loss": 3.041,
|
|
"step": 18060
|
|
},
|
|
{
|
|
"epoch": 3.9,
|
|
"learning_rate": 6.070403260894472e-06,
|
|
"loss": 3.0621,
|
|
"step": 18070
|
|
},
|
|
{
|
|
"epoch": 3.9,
|
|
"learning_rate": 6.047585210360662e-06,
|
|
"loss": 3.0713,
|
|
"step": 18080
|
|
},
|
|
{
|
|
"epoch": 3.91,
|
|
"learning_rate": 6.024804223749556e-06,
|
|
"loss": 3.0191,
|
|
"step": 18090
|
|
},
|
|
{
|
|
"epoch": 3.91,
|
|
"learning_rate": 6.002060345612348e-06,
|
|
"loss": 3.0548,
|
|
"step": 18100
|
|
},
|
|
{
|
|
"epoch": 3.91,
|
|
"learning_rate": 5.979353620427655e-06,
|
|
"loss": 3.0386,
|
|
"step": 18110
|
|
},
|
|
{
|
|
"epoch": 3.91,
|
|
"learning_rate": 5.956684092601458e-06,
|
|
"loss": 3.1075,
|
|
"step": 18120
|
|
},
|
|
{
|
|
"epoch": 3.91,
|
|
"learning_rate": 5.9340518064669756e-06,
|
|
"loss": 3.0638,
|
|
"step": 18130
|
|
},
|
|
{
|
|
"epoch": 3.92,
|
|
"learning_rate": 5.911456806284596e-06,
|
|
"loss": 3.0404,
|
|
"step": 18140
|
|
},
|
|
{
|
|
"epoch": 3.92,
|
|
"learning_rate": 5.8888991362417965e-06,
|
|
"loss": 3.0817,
|
|
"step": 18150
|
|
},
|
|
{
|
|
"epoch": 3.92,
|
|
"learning_rate": 5.866378840453044e-06,
|
|
"loss": 3.0259,
|
|
"step": 18160
|
|
},
|
|
{
|
|
"epoch": 3.92,
|
|
"learning_rate": 5.843895962959719e-06,
|
|
"loss": 3.0482,
|
|
"step": 18170
|
|
},
|
|
{
|
|
"epoch": 3.92,
|
|
"learning_rate": 5.821450547730023e-06,
|
|
"loss": 3.0094,
|
|
"step": 18180
|
|
},
|
|
{
|
|
"epoch": 3.93,
|
|
"learning_rate": 5.799042638658908e-06,
|
|
"loss": 3.0122,
|
|
"step": 18190
|
|
},
|
|
{
|
|
"epoch": 3.93,
|
|
"learning_rate": 5.776672279567958e-06,
|
|
"loss": 2.9955,
|
|
"step": 18200
|
|
},
|
|
{
|
|
"epoch": 3.93,
|
|
"learning_rate": 5.7543395142053344e-06,
|
|
"loss": 3.0325,
|
|
"step": 18210
|
|
},
|
|
{
|
|
"epoch": 3.93,
|
|
"learning_rate": 5.732044386245677e-06,
|
|
"loss": 3.0582,
|
|
"step": 18220
|
|
},
|
|
{
|
|
"epoch": 3.94,
|
|
"learning_rate": 5.709786939290021e-06,
|
|
"loss": 3.0594,
|
|
"step": 18230
|
|
},
|
|
{
|
|
"epoch": 3.94,
|
|
"learning_rate": 5.687567216865711e-06,
|
|
"loss": 3.0677,
|
|
"step": 18240
|
|
},
|
|
{
|
|
"epoch": 3.94,
|
|
"learning_rate": 5.665385262426315e-06,
|
|
"loss": 3.0355,
|
|
"step": 18250
|
|
},
|
|
{
|
|
"epoch": 3.94,
|
|
"learning_rate": 5.643241119351544e-06,
|
|
"loss": 3.0319,
|
|
"step": 18260
|
|
},
|
|
{
|
|
"epoch": 3.94,
|
|
"learning_rate": 5.621134830947164e-06,
|
|
"loss": 3.0383,
|
|
"step": 18270
|
|
},
|
|
{
|
|
"epoch": 3.95,
|
|
"learning_rate": 5.5990664404449055e-06,
|
|
"loss": 3.0282,
|
|
"step": 18280
|
|
},
|
|
{
|
|
"epoch": 3.95,
|
|
"learning_rate": 5.577035991002391e-06,
|
|
"loss": 3.0562,
|
|
"step": 18290
|
|
},
|
|
{
|
|
"epoch": 3.95,
|
|
"learning_rate": 5.555043525703041e-06,
|
|
"loss": 3.0665,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 3.95,
|
|
"learning_rate": 5.533089087555995e-06,
|
|
"loss": 3.0059,
|
|
"step": 18310
|
|
},
|
|
{
|
|
"epoch": 3.96,
|
|
"learning_rate": 5.511172719496025e-06,
|
|
"loss": 3.013,
|
|
"step": 18320
|
|
},
|
|
{
|
|
"epoch": 3.96,
|
|
"learning_rate": 5.489294464383451e-06,
|
|
"loss": 2.9888,
|
|
"step": 18330
|
|
},
|
|
{
|
|
"epoch": 3.96,
|
|
"learning_rate": 5.4674543650040515e-06,
|
|
"loss": 3.0788,
|
|
"step": 18340
|
|
},
|
|
{
|
|
"epoch": 3.96,
|
|
"learning_rate": 5.445652464069007e-06,
|
|
"loss": 3.0855,
|
|
"step": 18350
|
|
},
|
|
{
|
|
"epoch": 3.96,
|
|
"learning_rate": 5.423888804214775e-06,
|
|
"loss": 3.0896,
|
|
"step": 18360
|
|
},
|
|
{
|
|
"epoch": 3.97,
|
|
"learning_rate": 5.402163428003038e-06,
|
|
"loss": 2.9985,
|
|
"step": 18370
|
|
},
|
|
{
|
|
"epoch": 3.97,
|
|
"learning_rate": 5.380476377920604e-06,
|
|
"loss": 3.0263,
|
|
"step": 18380
|
|
},
|
|
{
|
|
"epoch": 3.97,
|
|
"learning_rate": 5.358827696379334e-06,
|
|
"loss": 3.0636,
|
|
"step": 18390
|
|
},
|
|
{
|
|
"epoch": 3.97,
|
|
"learning_rate": 5.3372174257160515e-06,
|
|
"loss": 2.9904,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 3.97,
|
|
"learning_rate": 5.315645608192463e-06,
|
|
"loss": 3.0567,
|
|
"step": 18410
|
|
},
|
|
{
|
|
"epoch": 3.98,
|
|
"learning_rate": 5.2941122859950785e-06,
|
|
"loss": 3.0682,
|
|
"step": 18420
|
|
},
|
|
{
|
|
"epoch": 3.98,
|
|
"learning_rate": 5.272617501235117e-06,
|
|
"loss": 3.0177,
|
|
"step": 18430
|
|
},
|
|
{
|
|
"epoch": 3.98,
|
|
"learning_rate": 5.251161295948443e-06,
|
|
"loss": 3.0723,
|
|
"step": 18440
|
|
},
|
|
{
|
|
"epoch": 3.98,
|
|
"learning_rate": 5.229743712095467e-06,
|
|
"loss": 3.0442,
|
|
"step": 18450
|
|
},
|
|
{
|
|
"epoch": 3.99,
|
|
"learning_rate": 5.208364791561071e-06,
|
|
"loss": 3.0286,
|
|
"step": 18460
|
|
},
|
|
{
|
|
"epoch": 3.99,
|
|
"learning_rate": 5.187024576154526e-06,
|
|
"loss": 3.0554,
|
|
"step": 18470
|
|
},
|
|
{
|
|
"epoch": 3.99,
|
|
"learning_rate": 5.16572310760941e-06,
|
|
"loss": 3.0437,
|
|
"step": 18480
|
|
},
|
|
{
|
|
"epoch": 3.99,
|
|
"learning_rate": 5.1444604275835305e-06,
|
|
"loss": 2.9838,
|
|
"step": 18490
|
|
},
|
|
{
|
|
"epoch": 3.99,
|
|
"learning_rate": 5.123236577658835e-06,
|
|
"loss": 3.0219,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"learning_rate": 5.1020515993413315e-06,
|
|
"loss": 3.0591,
|
|
"step": 18510
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"learning_rate": 5.080905534061014e-06,
|
|
"loss": 3.0456,
|
|
"step": 18520
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_loss": 3.0454726219177246,
|
|
"eval_runtime": 194.385,
|
|
"eval_samples_per_second": 762.507,
|
|
"eval_steps_per_second": 23.829,
|
|
"step": 18528
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"learning_rate": 5.059798423171777e-06,
|
|
"loss": 2.9865,
|
|
"step": 18530
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"learning_rate": 5.038730307951331e-06,
|
|
"loss": 3.0307,
|
|
"step": 18540
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"learning_rate": 5.0177012296011185e-06,
|
|
"loss": 3.0563,
|
|
"step": 18550
|
|
},
|
|
{
|
|
"epoch": 4.01,
|
|
"learning_rate": 4.996711229246268e-06,
|
|
"loss": 3.0752,
|
|
"step": 18560
|
|
},
|
|
{
|
|
"epoch": 4.01,
|
|
"learning_rate": 4.975760347935454e-06,
|
|
"loss": 3.0579,
|
|
"step": 18570
|
|
},
|
|
{
|
|
"epoch": 4.01,
|
|
"learning_rate": 4.954848626640865e-06,
|
|
"loss": 3.07,
|
|
"step": 18580
|
|
},
|
|
{
|
|
"epoch": 4.01,
|
|
"learning_rate": 4.933976106258104e-06,
|
|
"loss": 3.0167,
|
|
"step": 18590
|
|
},
|
|
{
|
|
"epoch": 4.02,
|
|
"learning_rate": 4.913142827606107e-06,
|
|
"loss": 3.005,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 4.02,
|
|
"learning_rate": 4.892348831427077e-06,
|
|
"loss": 3.0494,
|
|
"step": 18610
|
|
},
|
|
{
|
|
"epoch": 4.02,
|
|
"learning_rate": 4.871594158386386e-06,
|
|
"loss": 3.0751,
|
|
"step": 18620
|
|
},
|
|
{
|
|
"epoch": 4.02,
|
|
"learning_rate": 4.850878849072505e-06,
|
|
"loss": 3.0289,
|
|
"step": 18630
|
|
},
|
|
{
|
|
"epoch": 4.02,
|
|
"learning_rate": 4.830202943996937e-06,
|
|
"loss": 3.0331,
|
|
"step": 18640
|
|
},
|
|
{
|
|
"epoch": 4.03,
|
|
"learning_rate": 4.809566483594108e-06,
|
|
"loss": 2.9982,
|
|
"step": 18650
|
|
},
|
|
{
|
|
"epoch": 4.03,
|
|
"learning_rate": 4.788969508221314e-06,
|
|
"loss": 3.0489,
|
|
"step": 18660
|
|
},
|
|
{
|
|
"epoch": 4.03,
|
|
"learning_rate": 4.768412058158631e-06,
|
|
"loss": 3.0287,
|
|
"step": 18670
|
|
},
|
|
{
|
|
"epoch": 4.03,
|
|
"learning_rate": 4.747894173608839e-06,
|
|
"loss": 3.0382,
|
|
"step": 18680
|
|
},
|
|
{
|
|
"epoch": 4.03,
|
|
"learning_rate": 4.727415894697338e-06,
|
|
"loss": 3.0465,
|
|
"step": 18690
|
|
},
|
|
{
|
|
"epoch": 4.04,
|
|
"learning_rate": 4.706977261472076e-06,
|
|
"loss": 3.0746,
|
|
"step": 18700
|
|
},
|
|
{
|
|
"epoch": 4.04,
|
|
"learning_rate": 4.6865783139034756e-06,
|
|
"loss": 3.048,
|
|
"step": 18710
|
|
},
|
|
{
|
|
"epoch": 4.04,
|
|
"learning_rate": 4.666219091884338e-06,
|
|
"loss": 3.0308,
|
|
"step": 18720
|
|
},
|
|
{
|
|
"epoch": 4.04,
|
|
"learning_rate": 4.645899635229786e-06,
|
|
"loss": 3.0017,
|
|
"step": 18730
|
|
},
|
|
{
|
|
"epoch": 4.05,
|
|
"learning_rate": 4.625619983677168e-06,
|
|
"loss": 3.0251,
|
|
"step": 18740
|
|
},
|
|
{
|
|
"epoch": 4.05,
|
|
"learning_rate": 4.605380176885987e-06,
|
|
"loss": 3.0564,
|
|
"step": 18750
|
|
},
|
|
{
|
|
"epoch": 4.05,
|
|
"learning_rate": 4.585180254437838e-06,
|
|
"loss": 3.0616,
|
|
"step": 18760
|
|
},
|
|
{
|
|
"epoch": 4.05,
|
|
"learning_rate": 4.565020255836305e-06,
|
|
"loss": 3.0099,
|
|
"step": 18770
|
|
},
|
|
{
|
|
"epoch": 4.05,
|
|
"learning_rate": 4.544900220506901e-06,
|
|
"loss": 3.0518,
|
|
"step": 18780
|
|
},
|
|
{
|
|
"epoch": 4.06,
|
|
"learning_rate": 4.524820187796977e-06,
|
|
"loss": 3.0556,
|
|
"step": 18790
|
|
},
|
|
{
|
|
"epoch": 4.06,
|
|
"learning_rate": 4.504780196975664e-06,
|
|
"loss": 3.0582,
|
|
"step": 18800
|
|
},
|
|
{
|
|
"epoch": 4.06,
|
|
"learning_rate": 4.484780287233778e-06,
|
|
"loss": 3.0635,
|
|
"step": 18810
|
|
},
|
|
{
|
|
"epoch": 4.06,
|
|
"learning_rate": 4.464820497683758e-06,
|
|
"loss": 3.0223,
|
|
"step": 18820
|
|
},
|
|
{
|
|
"epoch": 4.07,
|
|
"learning_rate": 4.44490086735958e-06,
|
|
"loss": 3.0759,
|
|
"step": 18830
|
|
},
|
|
{
|
|
"epoch": 4.07,
|
|
"learning_rate": 4.425021435216684e-06,
|
|
"loss": 3.0536,
|
|
"step": 18840
|
|
},
|
|
{
|
|
"epoch": 4.07,
|
|
"learning_rate": 4.405182240131891e-06,
|
|
"loss": 3.0062,
|
|
"step": 18850
|
|
},
|
|
{
|
|
"epoch": 4.07,
|
|
"learning_rate": 4.385383320903344e-06,
|
|
"loss": 3.0171,
|
|
"step": 18860
|
|
},
|
|
{
|
|
"epoch": 4.07,
|
|
"learning_rate": 4.3656247162504105e-06,
|
|
"loss": 3.0007,
|
|
"step": 18870
|
|
},
|
|
{
|
|
"epoch": 4.08,
|
|
"learning_rate": 4.345906464813628e-06,
|
|
"loss": 3.046,
|
|
"step": 18880
|
|
},
|
|
{
|
|
"epoch": 4.08,
|
|
"learning_rate": 4.326228605154611e-06,
|
|
"loss": 3.0513,
|
|
"step": 18890
|
|
},
|
|
{
|
|
"epoch": 4.08,
|
|
"learning_rate": 4.3065911757559806e-06,
|
|
"loss": 3.0251,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 4.08,
|
|
"learning_rate": 4.286994215021301e-06,
|
|
"loss": 3.023,
|
|
"step": 18910
|
|
},
|
|
{
|
|
"epoch": 4.08,
|
|
"learning_rate": 4.267437761274987e-06,
|
|
"loss": 3.0176,
|
|
"step": 18920
|
|
},
|
|
{
|
|
"epoch": 4.09,
|
|
"learning_rate": 4.247921852762235e-06,
|
|
"loss": 3.0074,
|
|
"step": 18930
|
|
},
|
|
{
|
|
"epoch": 4.09,
|
|
"learning_rate": 4.228446527648955e-06,
|
|
"loss": 3.0077,
|
|
"step": 18940
|
|
},
|
|
{
|
|
"epoch": 4.09,
|
|
"learning_rate": 4.209011824021691e-06,
|
|
"loss": 2.9964,
|
|
"step": 18950
|
|
},
|
|
{
|
|
"epoch": 4.09,
|
|
"learning_rate": 4.189617779887539e-06,
|
|
"loss": 3.0993,
|
|
"step": 18960
|
|
},
|
|
{
|
|
"epoch": 4.1,
|
|
"learning_rate": 4.170264433174093e-06,
|
|
"loss": 3.0879,
|
|
"step": 18970
|
|
},
|
|
{
|
|
"epoch": 4.1,
|
|
"learning_rate": 4.150951821729349e-06,
|
|
"loss": 3.0127,
|
|
"step": 18980
|
|
},
|
|
{
|
|
"epoch": 4.1,
|
|
"learning_rate": 4.13167998332164e-06,
|
|
"loss": 3.0037,
|
|
"step": 18990
|
|
},
|
|
{
|
|
"epoch": 4.1,
|
|
"learning_rate": 4.112448955639561e-06,
|
|
"loss": 3.0264,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 4.1,
|
|
"learning_rate": 4.093258776291903e-06,
|
|
"loss": 3.0338,
|
|
"step": 19010
|
|
},
|
|
{
|
|
"epoch": 4.11,
|
|
"learning_rate": 4.0741094828075646e-06,
|
|
"loss": 2.9888,
|
|
"step": 19020
|
|
},
|
|
{
|
|
"epoch": 4.11,
|
|
"learning_rate": 4.055001112635492e-06,
|
|
"loss": 3.0405,
|
|
"step": 19030
|
|
},
|
|
{
|
|
"epoch": 4.11,
|
|
"learning_rate": 4.035933703144598e-06,
|
|
"loss": 3.0779,
|
|
"step": 19040
|
|
},
|
|
{
|
|
"epoch": 4.11,
|
|
"learning_rate": 4.016907291623695e-06,
|
|
"loss": 3.0088,
|
|
"step": 19050
|
|
},
|
|
{
|
|
"epoch": 4.11,
|
|
"learning_rate": 3.997921915281413e-06,
|
|
"loss": 3.0392,
|
|
"step": 19060
|
|
},
|
|
{
|
|
"epoch": 4.12,
|
|
"learning_rate": 3.978977611246137e-06,
|
|
"loss": 3.0591,
|
|
"step": 19070
|
|
},
|
|
{
|
|
"epoch": 4.12,
|
|
"learning_rate": 3.960074416565929e-06,
|
|
"loss": 3.0521,
|
|
"step": 19080
|
|
},
|
|
{
|
|
"epoch": 4.12,
|
|
"learning_rate": 3.941212368208447e-06,
|
|
"loss": 3.0649,
|
|
"step": 19090
|
|
},
|
|
{
|
|
"epoch": 4.12,
|
|
"learning_rate": 3.922391503060902e-06,
|
|
"loss": 3.0837,
|
|
"step": 19100
|
|
},
|
|
{
|
|
"epoch": 4.13,
|
|
"learning_rate": 3.903611857929951e-06,
|
|
"loss": 3.0604,
|
|
"step": 19110
|
|
},
|
|
{
|
|
"epoch": 4.13,
|
|
"learning_rate": 3.884873469541642e-06,
|
|
"loss": 3.0406,
|
|
"step": 19120
|
|
},
|
|
{
|
|
"epoch": 4.13,
|
|
"learning_rate": 3.866176374541338e-06,
|
|
"loss": 3.0361,
|
|
"step": 19130
|
|
},
|
|
{
|
|
"epoch": 4.13,
|
|
"learning_rate": 3.847520609493657e-06,
|
|
"loss": 3.0607,
|
|
"step": 19140
|
|
},
|
|
{
|
|
"epoch": 4.13,
|
|
"learning_rate": 3.828906210882377e-06,
|
|
"loss": 3.0639,
|
|
"step": 19150
|
|
},
|
|
{
|
|
"epoch": 4.14,
|
|
"learning_rate": 3.810333215110387e-06,
|
|
"loss": 3.0666,
|
|
"step": 19160
|
|
},
|
|
{
|
|
"epoch": 4.14,
|
|
"learning_rate": 3.7918016584996136e-06,
|
|
"loss": 3.0383,
|
|
"step": 19170
|
|
},
|
|
{
|
|
"epoch": 4.14,
|
|
"learning_rate": 3.7733115772909307e-06,
|
|
"loss": 3.0375,
|
|
"step": 19180
|
|
},
|
|
{
|
|
"epoch": 4.14,
|
|
"learning_rate": 3.7548630076441076e-06,
|
|
"loss": 3.0097,
|
|
"step": 19190
|
|
},
|
|
{
|
|
"epoch": 4.15,
|
|
"learning_rate": 3.736455985637735e-06,
|
|
"loss": 3.0756,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 4.15,
|
|
"learning_rate": 3.7180905472691457e-06,
|
|
"loss": 3.0592,
|
|
"step": 19210
|
|
},
|
|
{
|
|
"epoch": 4.15,
|
|
"learning_rate": 3.699766728454351e-06,
|
|
"loss": 3.0135,
|
|
"step": 19220
|
|
},
|
|
{
|
|
"epoch": 4.15,
|
|
"learning_rate": 3.681484565027979e-06,
|
|
"loss": 3.0452,
|
|
"step": 19230
|
|
},
|
|
{
|
|
"epoch": 4.15,
|
|
"learning_rate": 3.6632440927431814e-06,
|
|
"loss": 3.0594,
|
|
"step": 19240
|
|
},
|
|
{
|
|
"epoch": 4.16,
|
|
"learning_rate": 3.645045347271589e-06,
|
|
"loss": 3.0343,
|
|
"step": 19250
|
|
},
|
|
{
|
|
"epoch": 4.16,
|
|
"learning_rate": 3.6268883642032236e-06,
|
|
"loss": 2.9991,
|
|
"step": 19260
|
|
},
|
|
{
|
|
"epoch": 4.16,
|
|
"learning_rate": 3.6087731790464376e-06,
|
|
"loss": 3.0261,
|
|
"step": 19270
|
|
},
|
|
{
|
|
"epoch": 4.16,
|
|
"learning_rate": 3.590699827227842e-06,
|
|
"loss": 3.0457,
|
|
"step": 19280
|
|
},
|
|
{
|
|
"epoch": 4.16,
|
|
"learning_rate": 3.5726683440922394e-06,
|
|
"loss": 3.0501,
|
|
"step": 19290
|
|
},
|
|
{
|
|
"epoch": 4.17,
|
|
"learning_rate": 3.554678764902544e-06,
|
|
"loss": 3.0199,
|
|
"step": 19300
|
|
},
|
|
{
|
|
"epoch": 4.17,
|
|
"learning_rate": 3.5367311248397343e-06,
|
|
"loss": 3.0318,
|
|
"step": 19310
|
|
},
|
|
{
|
|
"epoch": 4.17,
|
|
"learning_rate": 3.5188254590027615e-06,
|
|
"loss": 2.9988,
|
|
"step": 19320
|
|
},
|
|
{
|
|
"epoch": 4.17,
|
|
"learning_rate": 3.5009618024084924e-06,
|
|
"loss": 3.0363,
|
|
"step": 19330
|
|
},
|
|
{
|
|
"epoch": 4.18,
|
|
"learning_rate": 3.483140189991646e-06,
|
|
"loss": 3.036,
|
|
"step": 19340
|
|
},
|
|
{
|
|
"epoch": 4.18,
|
|
"learning_rate": 3.4653606566047077e-06,
|
|
"loss": 3.0258,
|
|
"step": 19350
|
|
},
|
|
{
|
|
"epoch": 4.18,
|
|
"learning_rate": 3.44762323701788e-06,
|
|
"loss": 3.0607,
|
|
"step": 19360
|
|
},
|
|
{
|
|
"epoch": 4.18,
|
|
"learning_rate": 3.429927965919e-06,
|
|
"loss": 3.0373,
|
|
"step": 19370
|
|
},
|
|
{
|
|
"epoch": 4.18,
|
|
"learning_rate": 3.4122748779134905e-06,
|
|
"loss": 3.0332,
|
|
"step": 19380
|
|
},
|
|
{
|
|
"epoch": 4.19,
|
|
"learning_rate": 3.3946640075242675e-06,
|
|
"loss": 3.0379,
|
|
"step": 19390
|
|
},
|
|
{
|
|
"epoch": 4.19,
|
|
"learning_rate": 3.377095389191684e-06,
|
|
"loss": 3.0093,
|
|
"step": 19400
|
|
},
|
|
{
|
|
"epoch": 4.19,
|
|
"learning_rate": 3.3595690572734733e-06,
|
|
"loss": 3.0827,
|
|
"step": 19410
|
|
},
|
|
{
|
|
"epoch": 4.19,
|
|
"learning_rate": 3.3420850460446627e-06,
|
|
"loss": 3.0456,
|
|
"step": 19420
|
|
},
|
|
{
|
|
"epoch": 4.19,
|
|
"learning_rate": 3.324643389697521e-06,
|
|
"loss": 3.0801,
|
|
"step": 19430
|
|
},
|
|
{
|
|
"epoch": 4.2,
|
|
"learning_rate": 3.307244122341488e-06,
|
|
"loss": 3.0707,
|
|
"step": 19440
|
|
},
|
|
{
|
|
"epoch": 4.2,
|
|
"learning_rate": 3.289887278003101e-06,
|
|
"loss": 3.0354,
|
|
"step": 19450
|
|
},
|
|
{
|
|
"epoch": 4.2,
|
|
"learning_rate": 3.2725728906259357e-06,
|
|
"loss": 3.0557,
|
|
"step": 19460
|
|
},
|
|
{
|
|
"epoch": 4.2,
|
|
"learning_rate": 3.2553009940705396e-06,
|
|
"loss": 3.0726,
|
|
"step": 19470
|
|
},
|
|
{
|
|
"epoch": 4.21,
|
|
"learning_rate": 3.2380716221143636e-06,
|
|
"loss": 3.0545,
|
|
"step": 19480
|
|
},
|
|
{
|
|
"epoch": 4.21,
|
|
"learning_rate": 3.2208848084516933e-06,
|
|
"loss": 3.0324,
|
|
"step": 19490
|
|
},
|
|
{
|
|
"epoch": 4.21,
|
|
"learning_rate": 3.203740586693588e-06,
|
|
"loss": 3.0199,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 4.21,
|
|
"learning_rate": 3.186638990367813e-06,
|
|
"loss": 3.0257,
|
|
"step": 19510
|
|
},
|
|
{
|
|
"epoch": 4.21,
|
|
"learning_rate": 3.1695800529187747e-06,
|
|
"loss": 3.0524,
|
|
"step": 19520
|
|
},
|
|
{
|
|
"epoch": 4.22,
|
|
"learning_rate": 3.152563807707451e-06,
|
|
"loss": 3.0313,
|
|
"step": 19530
|
|
},
|
|
{
|
|
"epoch": 4.22,
|
|
"learning_rate": 3.1355902880113308e-06,
|
|
"loss": 3.0431,
|
|
"step": 19540
|
|
},
|
|
{
|
|
"epoch": 4.22,
|
|
"learning_rate": 3.118659527024356e-06,
|
|
"loss": 3.0687,
|
|
"step": 19550
|
|
},
|
|
{
|
|
"epoch": 4.22,
|
|
"learning_rate": 3.1017715578568362e-06,
|
|
"loss": 3.0065,
|
|
"step": 19560
|
|
},
|
|
{
|
|
"epoch": 4.22,
|
|
"learning_rate": 3.084926413535402e-06,
|
|
"loss": 2.9737,
|
|
"step": 19570
|
|
},
|
|
{
|
|
"epoch": 4.23,
|
|
"learning_rate": 3.068124127002936e-06,
|
|
"loss": 3.0437,
|
|
"step": 19580
|
|
},
|
|
{
|
|
"epoch": 4.23,
|
|
"learning_rate": 3.0513647311185e-06,
|
|
"loss": 3.0656,
|
|
"step": 19590
|
|
},
|
|
{
|
|
"epoch": 4.23,
|
|
"learning_rate": 3.034648258657283e-06,
|
|
"loss": 2.9954,
|
|
"step": 19600
|
|
},
|
|
{
|
|
"epoch": 4.23,
|
|
"learning_rate": 3.0179747423105255e-06,
|
|
"loss": 3.0332,
|
|
"step": 19610
|
|
},
|
|
{
|
|
"epoch": 4.24,
|
|
"learning_rate": 3.001344214685478e-06,
|
|
"loss": 3.0611,
|
|
"step": 19620
|
|
},
|
|
{
|
|
"epoch": 4.24,
|
|
"learning_rate": 2.984756708305303e-06,
|
|
"loss": 3.022,
|
|
"step": 19630
|
|
},
|
|
{
|
|
"epoch": 4.24,
|
|
"learning_rate": 2.9682122556090373e-06,
|
|
"loss": 3.0146,
|
|
"step": 19640
|
|
},
|
|
{
|
|
"epoch": 4.24,
|
|
"learning_rate": 2.951710888951517e-06,
|
|
"loss": 3.0682,
|
|
"step": 19650
|
|
},
|
|
{
|
|
"epoch": 4.24,
|
|
"learning_rate": 2.9352526406033227e-06,
|
|
"loss": 2.9759,
|
|
"step": 19660
|
|
},
|
|
{
|
|
"epoch": 4.25,
|
|
"learning_rate": 2.918837542750705e-06,
|
|
"loss": 3.0547,
|
|
"step": 19670
|
|
},
|
|
{
|
|
"epoch": 4.25,
|
|
"learning_rate": 2.9024656274955377e-06,
|
|
"loss": 3.0049,
|
|
"step": 19680
|
|
},
|
|
{
|
|
"epoch": 4.25,
|
|
"learning_rate": 2.8861369268552345e-06,
|
|
"loss": 3.0891,
|
|
"step": 19690
|
|
},
|
|
{
|
|
"epoch": 4.25,
|
|
"learning_rate": 2.8698514727627053e-06,
|
|
"loss": 3.0416,
|
|
"step": 19700
|
|
},
|
|
{
|
|
"epoch": 4.26,
|
|
"learning_rate": 2.8536092970662785e-06,
|
|
"loss": 3.067,
|
|
"step": 19710
|
|
},
|
|
{
|
|
"epoch": 4.26,
|
|
"learning_rate": 2.8374104315296577e-06,
|
|
"loss": 3.0185,
|
|
"step": 19720
|
|
},
|
|
{
|
|
"epoch": 4.26,
|
|
"learning_rate": 2.821254907831833e-06,
|
|
"loss": 3.0962,
|
|
"step": 19730
|
|
},
|
|
{
|
|
"epoch": 4.26,
|
|
"learning_rate": 2.8051427575670445e-06,
|
|
"loss": 3.0583,
|
|
"step": 19740
|
|
},
|
|
{
|
|
"epoch": 4.26,
|
|
"learning_rate": 2.7890740122447077e-06,
|
|
"loss": 3.0327,
|
|
"step": 19750
|
|
},
|
|
{
|
|
"epoch": 4.27,
|
|
"learning_rate": 2.773048703289352e-06,
|
|
"loss": 3.0717,
|
|
"step": 19760
|
|
},
|
|
{
|
|
"epoch": 4.27,
|
|
"learning_rate": 2.757066862040561e-06,
|
|
"loss": 3.074,
|
|
"step": 19770
|
|
},
|
|
{
|
|
"epoch": 4.27,
|
|
"learning_rate": 2.741128519752911e-06,
|
|
"loss": 3.0281,
|
|
"step": 19780
|
|
},
|
|
{
|
|
"epoch": 4.27,
|
|
"learning_rate": 2.725233707595917e-06,
|
|
"loss": 3.0431,
|
|
"step": 19790
|
|
},
|
|
{
|
|
"epoch": 4.27,
|
|
"learning_rate": 2.7093824566539613e-06,
|
|
"loss": 2.9766,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 4.28,
|
|
"learning_rate": 2.6935747979262296e-06,
|
|
"loss": 3.0196,
|
|
"step": 19810
|
|
},
|
|
{
|
|
"epoch": 4.28,
|
|
"learning_rate": 2.6778107623266683e-06,
|
|
"loss": 3.0266,
|
|
"step": 19820
|
|
},
|
|
{
|
|
"epoch": 4.28,
|
|
"learning_rate": 2.662090380683907e-06,
|
|
"loss": 2.9949,
|
|
"step": 19830
|
|
},
|
|
{
|
|
"epoch": 4.28,
|
|
"learning_rate": 2.646413683741203e-06,
|
|
"loss": 3.0544,
|
|
"step": 19840
|
|
},
|
|
{
|
|
"epoch": 4.29,
|
|
"learning_rate": 2.630780702156388e-06,
|
|
"loss": 3.0302,
|
|
"step": 19850
|
|
},
|
|
{
|
|
"epoch": 4.29,
|
|
"learning_rate": 2.6151914665017985e-06,
|
|
"loss": 3.0381,
|
|
"step": 19860
|
|
},
|
|
{
|
|
"epoch": 4.29,
|
|
"learning_rate": 2.59964600726422e-06,
|
|
"loss": 3.0418,
|
|
"step": 19870
|
|
},
|
|
{
|
|
"epoch": 4.29,
|
|
"learning_rate": 2.584144354844831e-06,
|
|
"loss": 3.0066,
|
|
"step": 19880
|
|
},
|
|
{
|
|
"epoch": 4.29,
|
|
"learning_rate": 2.568686539559134e-06,
|
|
"loss": 3.0644,
|
|
"step": 19890
|
|
},
|
|
{
|
|
"epoch": 4.3,
|
|
"learning_rate": 2.5532725916369073e-06,
|
|
"loss": 2.9938,
|
|
"step": 19900
|
|
},
|
|
{
|
|
"epoch": 4.3,
|
|
"learning_rate": 2.5379025412221396e-06,
|
|
"loss": 3.0974,
|
|
"step": 19910
|
|
},
|
|
{
|
|
"epoch": 4.3,
|
|
"learning_rate": 2.5225764183729766e-06,
|
|
"loss": 3.0452,
|
|
"step": 19920
|
|
},
|
|
{
|
|
"epoch": 4.3,
|
|
"learning_rate": 2.5072942530616457e-06,
|
|
"loss": 2.9947,
|
|
"step": 19930
|
|
},
|
|
{
|
|
"epoch": 4.3,
|
|
"learning_rate": 2.4920560751744234e-06,
|
|
"loss": 3.0233,
|
|
"step": 19940
|
|
},
|
|
{
|
|
"epoch": 4.31,
|
|
"learning_rate": 2.4768619145115525e-06,
|
|
"loss": 3.0352,
|
|
"step": 19950
|
|
},
|
|
{
|
|
"epoch": 4.31,
|
|
"learning_rate": 2.4617118007872003e-06,
|
|
"loss": 3.0504,
|
|
"step": 19960
|
|
},
|
|
{
|
|
"epoch": 4.31,
|
|
"learning_rate": 2.446605763629398e-06,
|
|
"loss": 3.0252,
|
|
"step": 19970
|
|
},
|
|
{
|
|
"epoch": 4.31,
|
|
"learning_rate": 2.431543832579966e-06,
|
|
"loss": 3.0409,
|
|
"step": 19980
|
|
},
|
|
{
|
|
"epoch": 4.32,
|
|
"learning_rate": 2.4165260370944797e-06,
|
|
"loss": 3.0288,
|
|
"step": 19990
|
|
},
|
|
{
|
|
"epoch": 4.32,
|
|
"learning_rate": 2.401552406542207e-06,
|
|
"loss": 3.0469,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 4.32,
|
|
"learning_rate": 2.3866229702060317e-06,
|
|
"loss": 3.069,
|
|
"step": 20010
|
|
},
|
|
{
|
|
"epoch": 4.32,
|
|
"learning_rate": 2.371737757282419e-06,
|
|
"loss": 3.0277,
|
|
"step": 20020
|
|
},
|
|
{
|
|
"epoch": 4.32,
|
|
"learning_rate": 2.356896796881347e-06,
|
|
"loss": 3.0241,
|
|
"step": 20030
|
|
},
|
|
{
|
|
"epoch": 4.33,
|
|
"learning_rate": 2.3421001180262527e-06,
|
|
"loss": 3.0497,
|
|
"step": 20040
|
|
},
|
|
{
|
|
"epoch": 4.33,
|
|
"learning_rate": 2.3273477496539713e-06,
|
|
"loss": 2.999,
|
|
"step": 20050
|
|
},
|
|
{
|
|
"epoch": 4.33,
|
|
"learning_rate": 2.31263972061469e-06,
|
|
"loss": 3.0518,
|
|
"step": 20060
|
|
},
|
|
{
|
|
"epoch": 4.33,
|
|
"learning_rate": 2.2979760596718745e-06,
|
|
"loss": 3.0747,
|
|
"step": 20070
|
|
},
|
|
{
|
|
"epoch": 4.34,
|
|
"learning_rate": 2.2833567955022394e-06,
|
|
"loss": 2.9885,
|
|
"step": 20080
|
|
},
|
|
{
|
|
"epoch": 4.34,
|
|
"learning_rate": 2.2687819566956592e-06,
|
|
"loss": 3.0485,
|
|
"step": 20090
|
|
},
|
|
{
|
|
"epoch": 4.34,
|
|
"learning_rate": 2.2542515717551336e-06,
|
|
"loss": 3.0665,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 4.34,
|
|
"learning_rate": 2.23976566909673e-06,
|
|
"loss": 3.071,
|
|
"step": 20110
|
|
},
|
|
{
|
|
"epoch": 4.34,
|
|
"learning_rate": 2.225324277049526e-06,
|
|
"loss": 3.0072,
|
|
"step": 20120
|
|
},
|
|
{
|
|
"epoch": 4.35,
|
|
"learning_rate": 2.210927423855547e-06,
|
|
"loss": 3.0289,
|
|
"step": 20130
|
|
},
|
|
{
|
|
"epoch": 4.35,
|
|
"learning_rate": 2.1965751376697208e-06,
|
|
"loss": 3.0734,
|
|
"step": 20140
|
|
},
|
|
{
|
|
"epoch": 4.35,
|
|
"learning_rate": 2.182267446559816e-06,
|
|
"loss": 3.03,
|
|
"step": 20150
|
|
},
|
|
{
|
|
"epoch": 4.35,
|
|
"learning_rate": 2.1680043785063913e-06,
|
|
"loss": 2.9907,
|
|
"step": 20160
|
|
},
|
|
{
|
|
"epoch": 4.35,
|
|
"learning_rate": 2.1537859614027432e-06,
|
|
"loss": 3.0549,
|
|
"step": 20170
|
|
},
|
|
{
|
|
"epoch": 4.36,
|
|
"learning_rate": 2.139612223054843e-06,
|
|
"loss": 3.0406,
|
|
"step": 20180
|
|
},
|
|
{
|
|
"epoch": 4.36,
|
|
"learning_rate": 2.1254831911812794e-06,
|
|
"loss": 3.0698,
|
|
"step": 20190
|
|
},
|
|
{
|
|
"epoch": 4.36,
|
|
"learning_rate": 2.11139889341323e-06,
|
|
"loss": 3.0084,
|
|
"step": 20200
|
|
},
|
|
{
|
|
"epoch": 4.36,
|
|
"learning_rate": 2.0973593572943735e-06,
|
|
"loss": 3.0007,
|
|
"step": 20210
|
|
},
|
|
{
|
|
"epoch": 4.37,
|
|
"learning_rate": 2.083364610280861e-06,
|
|
"loss": 3.051,
|
|
"step": 20220
|
|
},
|
|
{
|
|
"epoch": 4.37,
|
|
"learning_rate": 2.069414679741244e-06,
|
|
"loss": 3.0615,
|
|
"step": 20230
|
|
},
|
|
{
|
|
"epoch": 4.37,
|
|
"learning_rate": 2.0555095929564337e-06,
|
|
"loss": 3.0019,
|
|
"step": 20240
|
|
},
|
|
{
|
|
"epoch": 4.37,
|
|
"learning_rate": 2.0416493771196476e-06,
|
|
"loss": 3.0209,
|
|
"step": 20250
|
|
},
|
|
{
|
|
"epoch": 4.37,
|
|
"learning_rate": 2.027834059336345e-06,
|
|
"loss": 3.0158,
|
|
"step": 20260
|
|
},
|
|
{
|
|
"epoch": 4.38,
|
|
"learning_rate": 2.014063666624186e-06,
|
|
"loss": 3.0288,
|
|
"step": 20270
|
|
},
|
|
{
|
|
"epoch": 4.38,
|
|
"learning_rate": 2.000338225912968e-06,
|
|
"loss": 3.042,
|
|
"step": 20280
|
|
},
|
|
{
|
|
"epoch": 4.38,
|
|
"learning_rate": 1.9866577640445887e-06,
|
|
"loss": 3.0453,
|
|
"step": 20290
|
|
},
|
|
{
|
|
"epoch": 4.38,
|
|
"learning_rate": 1.9730223077729757e-06,
|
|
"loss": 3.0062,
|
|
"step": 20300
|
|
},
|
|
{
|
|
"epoch": 4.38,
|
|
"learning_rate": 1.9594318837640457e-06,
|
|
"loss": 3.0103,
|
|
"step": 20310
|
|
},
|
|
{
|
|
"epoch": 4.39,
|
|
"learning_rate": 1.9458865185956466e-06,
|
|
"loss": 3.0085,
|
|
"step": 20320
|
|
},
|
|
{
|
|
"epoch": 4.39,
|
|
"learning_rate": 1.932386238757508e-06,
|
|
"loss": 3.0425,
|
|
"step": 20330
|
|
},
|
|
{
|
|
"epoch": 4.39,
|
|
"learning_rate": 1.918931070651195e-06,
|
|
"loss": 3.0467,
|
|
"step": 20340
|
|
},
|
|
{
|
|
"epoch": 4.39,
|
|
"learning_rate": 1.9055210405900443e-06,
|
|
"loss": 3.0108,
|
|
"step": 20350
|
|
},
|
|
{
|
|
"epoch": 4.4,
|
|
"learning_rate": 1.89215617479912e-06,
|
|
"loss": 3.0835,
|
|
"step": 20360
|
|
},
|
|
{
|
|
"epoch": 4.4,
|
|
"learning_rate": 1.8788364994151652e-06,
|
|
"loss": 3.0448,
|
|
"step": 20370
|
|
},
|
|
{
|
|
"epoch": 4.4,
|
|
"learning_rate": 1.865562040486543e-06,
|
|
"loss": 3.0499,
|
|
"step": 20380
|
|
},
|
|
{
|
|
"epoch": 4.4,
|
|
"learning_rate": 1.8523328239731907e-06,
|
|
"loss": 3.078,
|
|
"step": 20390
|
|
},
|
|
{
|
|
"epoch": 4.4,
|
|
"learning_rate": 1.8391488757465685e-06,
|
|
"loss": 2.996,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 4.41,
|
|
"learning_rate": 1.8260102215896163e-06,
|
|
"loss": 3.0058,
|
|
"step": 20410
|
|
},
|
|
{
|
|
"epoch": 4.41,
|
|
"learning_rate": 1.8129168871966834e-06,
|
|
"loss": 3.0447,
|
|
"step": 20420
|
|
},
|
|
{
|
|
"epoch": 4.41,
|
|
"learning_rate": 1.7998688981734956e-06,
|
|
"loss": 3.0511,
|
|
"step": 20430
|
|
},
|
|
{
|
|
"epoch": 4.41,
|
|
"learning_rate": 1.7868662800371e-06,
|
|
"loss": 3.0276,
|
|
"step": 20440
|
|
},
|
|
{
|
|
"epoch": 4.41,
|
|
"learning_rate": 1.7739090582158142e-06,
|
|
"loss": 2.9776,
|
|
"step": 20450
|
|
},
|
|
{
|
|
"epoch": 4.42,
|
|
"learning_rate": 1.7609972580491795e-06,
|
|
"loss": 3.0327,
|
|
"step": 20460
|
|
},
|
|
{
|
|
"epoch": 4.42,
|
|
"learning_rate": 1.7481309047879064e-06,
|
|
"loss": 3.0344,
|
|
"step": 20470
|
|
},
|
|
{
|
|
"epoch": 4.42,
|
|
"learning_rate": 1.7353100235938308e-06,
|
|
"loss": 3.0573,
|
|
"step": 20480
|
|
},
|
|
{
|
|
"epoch": 4.42,
|
|
"learning_rate": 1.722534639539858e-06,
|
|
"loss": 3.0018,
|
|
"step": 20490
|
|
},
|
|
{
|
|
"epoch": 4.43,
|
|
"learning_rate": 1.709804777609922e-06,
|
|
"loss": 3.0297,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"epoch": 4.43,
|
|
"learning_rate": 1.6971204626989313e-06,
|
|
"loss": 3.0394,
|
|
"step": 20510
|
|
},
|
|
{
|
|
"epoch": 4.43,
|
|
"learning_rate": 1.6844817196127155e-06,
|
|
"loss": 3.0197,
|
|
"step": 20520
|
|
},
|
|
{
|
|
"epoch": 4.43,
|
|
"learning_rate": 1.6718885730679951e-06,
|
|
"loss": 3.0315,
|
|
"step": 20530
|
|
},
|
|
{
|
|
"epoch": 4.43,
|
|
"learning_rate": 1.659341047692309e-06,
|
|
"loss": 3.0369,
|
|
"step": 20540
|
|
},
|
|
{
|
|
"epoch": 4.44,
|
|
"learning_rate": 1.6468391680239836e-06,
|
|
"loss": 3.0255,
|
|
"step": 20550
|
|
},
|
|
{
|
|
"epoch": 4.44,
|
|
"learning_rate": 1.6343829585120763e-06,
|
|
"loss": 3.0541,
|
|
"step": 20560
|
|
},
|
|
{
|
|
"epoch": 4.44,
|
|
"learning_rate": 1.6219724435163314e-06,
|
|
"loss": 3.0399,
|
|
"step": 20570
|
|
},
|
|
{
|
|
"epoch": 4.44,
|
|
"learning_rate": 1.6096076473071347e-06,
|
|
"loss": 3.0176,
|
|
"step": 20580
|
|
},
|
|
{
|
|
"epoch": 4.45,
|
|
"learning_rate": 1.5972885940654575e-06,
|
|
"loss": 3.0178,
|
|
"step": 20590
|
|
},
|
|
{
|
|
"epoch": 4.45,
|
|
"learning_rate": 1.5850153078828146e-06,
|
|
"loss": 3.0553,
|
|
"step": 20600
|
|
},
|
|
{
|
|
"epoch": 4.45,
|
|
"learning_rate": 1.5727878127612283e-06,
|
|
"loss": 3.023,
|
|
"step": 20610
|
|
},
|
|
{
|
|
"epoch": 4.45,
|
|
"learning_rate": 1.5606061326131571e-06,
|
|
"loss": 3.0519,
|
|
"step": 20620
|
|
},
|
|
{
|
|
"epoch": 4.45,
|
|
"learning_rate": 1.5484702912614723e-06,
|
|
"loss": 2.9974,
|
|
"step": 20630
|
|
},
|
|
{
|
|
"epoch": 4.46,
|
|
"learning_rate": 1.5363803124393894e-06,
|
|
"loss": 3.0369,
|
|
"step": 20640
|
|
},
|
|
{
|
|
"epoch": 4.46,
|
|
"learning_rate": 1.5243362197904481e-06,
|
|
"loss": 3.0243,
|
|
"step": 20650
|
|
},
|
|
{
|
|
"epoch": 4.46,
|
|
"learning_rate": 1.5123380368684409e-06,
|
|
"loss": 3.0834,
|
|
"step": 20660
|
|
},
|
|
{
|
|
"epoch": 4.46,
|
|
"learning_rate": 1.5003857871373844e-06,
|
|
"loss": 2.9976,
|
|
"step": 20670
|
|
},
|
|
{
|
|
"epoch": 4.46,
|
|
"learning_rate": 1.488479493971462e-06,
|
|
"loss": 3.0548,
|
|
"step": 20680
|
|
},
|
|
{
|
|
"epoch": 4.47,
|
|
"learning_rate": 1.476619180654984e-06,
|
|
"loss": 2.9973,
|
|
"step": 20690
|
|
},
|
|
{
|
|
"epoch": 4.47,
|
|
"learning_rate": 1.4648048703823441e-06,
|
|
"loss": 3.0671,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 4.47,
|
|
"learning_rate": 1.453036586257972e-06,
|
|
"loss": 3.0396,
|
|
"step": 20710
|
|
},
|
|
{
|
|
"epoch": 4.47,
|
|
"learning_rate": 1.4413143512962802e-06,
|
|
"loss": 3.0585,
|
|
"step": 20720
|
|
},
|
|
{
|
|
"epoch": 4.48,
|
|
"learning_rate": 1.4296381884216308e-06,
|
|
"loss": 3.0527,
|
|
"step": 20730
|
|
},
|
|
{
|
|
"epoch": 4.48,
|
|
"learning_rate": 1.4180081204682867e-06,
|
|
"loss": 3.0313,
|
|
"step": 20740
|
|
},
|
|
{
|
|
"epoch": 4.48,
|
|
"learning_rate": 1.4064241701803649e-06,
|
|
"loss": 3.0277,
|
|
"step": 20750
|
|
},
|
|
{
|
|
"epoch": 4.48,
|
|
"learning_rate": 1.3948863602117945e-06,
|
|
"loss": 3.0199,
|
|
"step": 20760
|
|
},
|
|
{
|
|
"epoch": 4.48,
|
|
"learning_rate": 1.3833947131262682e-06,
|
|
"loss": 3.0776,
|
|
"step": 20770
|
|
},
|
|
{
|
|
"epoch": 4.49,
|
|
"learning_rate": 1.3719492513972004e-06,
|
|
"loss": 3.0539,
|
|
"step": 20780
|
|
},
|
|
{
|
|
"epoch": 4.49,
|
|
"learning_rate": 1.3605499974076923e-06,
|
|
"loss": 3.0332,
|
|
"step": 20790
|
|
},
|
|
{
|
|
"epoch": 4.49,
|
|
"learning_rate": 1.34919697345047e-06,
|
|
"loss": 3.022,
|
|
"step": 20800
|
|
},
|
|
{
|
|
"epoch": 4.49,
|
|
"learning_rate": 1.3378902017278538e-06,
|
|
"loss": 3.047,
|
|
"step": 20810
|
|
},
|
|
{
|
|
"epoch": 4.49,
|
|
"learning_rate": 1.3266297043517172e-06,
|
|
"loss": 3.0031,
|
|
"step": 20820
|
|
},
|
|
{
|
|
"epoch": 4.5,
|
|
"learning_rate": 1.315415503343434e-06,
|
|
"loss": 3.0134,
|
|
"step": 20830
|
|
},
|
|
{
|
|
"epoch": 4.5,
|
|
"learning_rate": 1.3042476206338334e-06,
|
|
"loss": 3.0063,
|
|
"step": 20840
|
|
},
|
|
{
|
|
"epoch": 4.5,
|
|
"learning_rate": 1.2931260780631727e-06,
|
|
"loss": 3.0254,
|
|
"step": 20850
|
|
},
|
|
{
|
|
"epoch": 4.5,
|
|
"learning_rate": 1.2820508973810791e-06,
|
|
"loss": 3.0246,
|
|
"step": 20860
|
|
},
|
|
{
|
|
"epoch": 4.51,
|
|
"learning_rate": 1.2710221002465189e-06,
|
|
"loss": 3.0109,
|
|
"step": 20870
|
|
},
|
|
{
|
|
"epoch": 4.51,
|
|
"learning_rate": 1.2600397082277393e-06,
|
|
"loss": 3.0084,
|
|
"step": 20880
|
|
},
|
|
{
|
|
"epoch": 4.51,
|
|
"learning_rate": 1.2491037428022489e-06,
|
|
"loss": 3.0538,
|
|
"step": 20890
|
|
},
|
|
{
|
|
"epoch": 4.51,
|
|
"learning_rate": 1.2382142253567513e-06,
|
|
"loss": 3.0405,
|
|
"step": 20900
|
|
},
|
|
{
|
|
"epoch": 4.51,
|
|
"learning_rate": 1.2273711771871255e-06,
|
|
"loss": 3.0142,
|
|
"step": 20910
|
|
},
|
|
{
|
|
"epoch": 4.52,
|
|
"learning_rate": 1.2165746194983646e-06,
|
|
"loss": 3.046,
|
|
"step": 20920
|
|
},
|
|
{
|
|
"epoch": 4.52,
|
|
"learning_rate": 1.205824573404546e-06,
|
|
"loss": 3.0742,
|
|
"step": 20930
|
|
},
|
|
{
|
|
"epoch": 4.52,
|
|
"learning_rate": 1.1951210599287943e-06,
|
|
"loss": 2.9801,
|
|
"step": 20940
|
|
},
|
|
{
|
|
"epoch": 4.52,
|
|
"learning_rate": 1.184464100003224e-06,
|
|
"loss": 3.0401,
|
|
"step": 20950
|
|
},
|
|
{
|
|
"epoch": 4.53,
|
|
"learning_rate": 1.1738537144689116e-06,
|
|
"loss": 3.0078,
|
|
"step": 20960
|
|
},
|
|
{
|
|
"epoch": 4.53,
|
|
"learning_rate": 1.1632899240758554e-06,
|
|
"loss": 3.0189,
|
|
"step": 20970
|
|
},
|
|
{
|
|
"epoch": 4.53,
|
|
"learning_rate": 1.152772749482925e-06,
|
|
"loss": 2.9714,
|
|
"step": 20980
|
|
},
|
|
{
|
|
"epoch": 4.53,
|
|
"learning_rate": 1.1423022112578312e-06,
|
|
"loss": 3.0306,
|
|
"step": 20990
|
|
},
|
|
{
|
|
"epoch": 4.53,
|
|
"learning_rate": 1.1318783298770784e-06,
|
|
"loss": 3.0823,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 4.54,
|
|
"learning_rate": 1.1215011257259266e-06,
|
|
"loss": 3.0749,
|
|
"step": 21010
|
|
},
|
|
{
|
|
"epoch": 4.54,
|
|
"learning_rate": 1.111170619098356e-06,
|
|
"loss": 3.0678,
|
|
"step": 21020
|
|
},
|
|
{
|
|
"epoch": 4.54,
|
|
"learning_rate": 1.1008868301970205e-06,
|
|
"loss": 3.0619,
|
|
"step": 21030
|
|
},
|
|
{
|
|
"epoch": 4.54,
|
|
"learning_rate": 1.0906497791332127e-06,
|
|
"loss": 3.0172,
|
|
"step": 21040
|
|
},
|
|
{
|
|
"epoch": 4.54,
|
|
"learning_rate": 1.0804594859268213e-06,
|
|
"loss": 3.0106,
|
|
"step": 21050
|
|
},
|
|
{
|
|
"epoch": 4.55,
|
|
"learning_rate": 1.0703159705062998e-06,
|
|
"loss": 2.9689,
|
|
"step": 21060
|
|
},
|
|
{
|
|
"epoch": 4.55,
|
|
"learning_rate": 1.0602192527086163e-06,
|
|
"loss": 3.0145,
|
|
"step": 21070
|
|
},
|
|
{
|
|
"epoch": 4.55,
|
|
"learning_rate": 1.0501693522792205e-06,
|
|
"loss": 3.0696,
|
|
"step": 21080
|
|
},
|
|
{
|
|
"epoch": 4.55,
|
|
"learning_rate": 1.0401662888720049e-06,
|
|
"loss": 3.0166,
|
|
"step": 21090
|
|
},
|
|
{
|
|
"epoch": 4.56,
|
|
"learning_rate": 1.0302100820492684e-06,
|
|
"loss": 3.0319,
|
|
"step": 21100
|
|
},
|
|
{
|
|
"epoch": 4.56,
|
|
"learning_rate": 1.020300751281672e-06,
|
|
"loss": 3.0396,
|
|
"step": 21110
|
|
},
|
|
{
|
|
"epoch": 4.56,
|
|
"learning_rate": 1.0104383159482062e-06,
|
|
"loss": 3.0682,
|
|
"step": 21120
|
|
},
|
|
{
|
|
"epoch": 4.56,
|
|
"learning_rate": 1.0006227953361535e-06,
|
|
"loss": 3.0346,
|
|
"step": 21130
|
|
},
|
|
{
|
|
"epoch": 4.56,
|
|
"learning_rate": 9.908542086410428e-07,
|
|
"loss": 3.0435,
|
|
"step": 21140
|
|
},
|
|
{
|
|
"epoch": 4.57,
|
|
"learning_rate": 9.811325749666283e-07,
|
|
"loss": 2.9827,
|
|
"step": 21150
|
|
},
|
|
{
|
|
"epoch": 4.57,
|
|
"learning_rate": 9.714579133248274e-07,
|
|
"loss": 3.0127,
|
|
"step": 21160
|
|
},
|
|
{
|
|
"epoch": 4.57,
|
|
"learning_rate": 9.618302426357085e-07,
|
|
"loss": 3.0431,
|
|
"step": 21170
|
|
},
|
|
{
|
|
"epoch": 4.57,
|
|
"learning_rate": 9.52249581727438e-07,
|
|
"loss": 3.0331,
|
|
"step": 21180
|
|
},
|
|
{
|
|
"epoch": 4.57,
|
|
"learning_rate": 9.427159493362481e-07,
|
|
"loss": 3.0888,
|
|
"step": 21190
|
|
},
|
|
{
|
|
"epoch": 4.58,
|
|
"learning_rate": 9.332293641064055e-07,
|
|
"loss": 3.0355,
|
|
"step": 21200
|
|
},
|
|
{
|
|
"epoch": 4.58,
|
|
"learning_rate": 9.237898445901672e-07,
|
|
"loss": 3.0158,
|
|
"step": 21210
|
|
},
|
|
{
|
|
"epoch": 4.58,
|
|
"learning_rate": 9.143974092477386e-07,
|
|
"loss": 3.0575,
|
|
"step": 21220
|
|
},
|
|
{
|
|
"epoch": 4.58,
|
|
"learning_rate": 9.050520764472658e-07,
|
|
"loss": 3.0386,
|
|
"step": 21230
|
|
},
|
|
{
|
|
"epoch": 4.59,
|
|
"learning_rate": 8.957538644647601e-07,
|
|
"loss": 3.0041,
|
|
"step": 21240
|
|
},
|
|
{
|
|
"epoch": 4.59,
|
|
"learning_rate": 8.865027914840923e-07,
|
|
"loss": 2.9995,
|
|
"step": 21250
|
|
},
|
|
{
|
|
"epoch": 4.59,
|
|
"learning_rate": 8.772988755969436e-07,
|
|
"loss": 3.034,
|
|
"step": 21260
|
|
},
|
|
{
|
|
"epoch": 4.59,
|
|
"learning_rate": 8.681421348027713e-07,
|
|
"loss": 3.0114,
|
|
"step": 21270
|
|
},
|
|
{
|
|
"epoch": 4.59,
|
|
"learning_rate": 8.590325870087817e-07,
|
|
"loss": 3.0492,
|
|
"step": 21280
|
|
},
|
|
{
|
|
"epoch": 4.6,
|
|
"learning_rate": 8.499702500298829e-07,
|
|
"loss": 3.0267,
|
|
"step": 21290
|
|
},
|
|
{
|
|
"epoch": 4.6,
|
|
"learning_rate": 8.409551415886591e-07,
|
|
"loss": 3.0024,
|
|
"step": 21300
|
|
},
|
|
{
|
|
"epoch": 4.6,
|
|
"learning_rate": 8.319872793153355e-07,
|
|
"loss": 3.0313,
|
|
"step": 21310
|
|
},
|
|
{
|
|
"epoch": 4.6,
|
|
"learning_rate": 8.230666807477333e-07,
|
|
"loss": 3.0003,
|
|
"step": 21320
|
|
},
|
|
{
|
|
"epoch": 4.6,
|
|
"learning_rate": 8.141933633312504e-07,
|
|
"loss": 3.0757,
|
|
"step": 21330
|
|
},
|
|
{
|
|
"epoch": 4.61,
|
|
"learning_rate": 8.053673444188197e-07,
|
|
"loss": 3.0307,
|
|
"step": 21340
|
|
},
|
|
{
|
|
"epoch": 4.61,
|
|
"learning_rate": 7.965886412708707e-07,
|
|
"loss": 3.0076,
|
|
"step": 21350
|
|
},
|
|
{
|
|
"epoch": 4.61,
|
|
"learning_rate": 7.878572710553062e-07,
|
|
"loss": 3.0573,
|
|
"step": 21360
|
|
},
|
|
{
|
|
"epoch": 4.61,
|
|
"learning_rate": 7.791732508474592e-07,
|
|
"loss": 3.0526,
|
|
"step": 21370
|
|
},
|
|
{
|
|
"epoch": 4.62,
|
|
"learning_rate": 7.7053659763007e-07,
|
|
"loss": 3.0403,
|
|
"step": 21380
|
|
},
|
|
{
|
|
"epoch": 4.62,
|
|
"learning_rate": 7.61947328293236e-07,
|
|
"loss": 3.0372,
|
|
"step": 21390
|
|
},
|
|
{
|
|
"epoch": 4.62,
|
|
"learning_rate": 7.534054596344015e-07,
|
|
"loss": 3.0721,
|
|
"step": 21400
|
|
},
|
|
{
|
|
"epoch": 4.62,
|
|
"learning_rate": 7.44911008358301e-07,
|
|
"loss": 3.033,
|
|
"step": 21410
|
|
},
|
|
{
|
|
"epoch": 4.62,
|
|
"learning_rate": 7.364639910769438e-07,
|
|
"loss": 3.0855,
|
|
"step": 21420
|
|
},
|
|
{
|
|
"epoch": 4.63,
|
|
"learning_rate": 7.280644243095825e-07,
|
|
"loss": 3.046,
|
|
"step": 21430
|
|
},
|
|
{
|
|
"epoch": 4.63,
|
|
"learning_rate": 7.197123244826603e-07,
|
|
"loss": 3.0089,
|
|
"step": 21440
|
|
},
|
|
{
|
|
"epoch": 4.63,
|
|
"learning_rate": 7.11407707929801e-07,
|
|
"loss": 3.1103,
|
|
"step": 21450
|
|
},
|
|
{
|
|
"epoch": 4.63,
|
|
"learning_rate": 7.031505908917685e-07,
|
|
"loss": 3.0203,
|
|
"step": 21460
|
|
},
|
|
{
|
|
"epoch": 4.64,
|
|
"learning_rate": 6.949409895164294e-07,
|
|
"loss": 3.0508,
|
|
"step": 21470
|
|
},
|
|
{
|
|
"epoch": 4.64,
|
|
"learning_rate": 6.867789198587382e-07,
|
|
"loss": 3.0423,
|
|
"step": 21480
|
|
},
|
|
{
|
|
"epoch": 4.64,
|
|
"learning_rate": 6.78664397880685e-07,
|
|
"loss": 3.0054,
|
|
"step": 21490
|
|
},
|
|
{
|
|
"epoch": 4.64,
|
|
"learning_rate": 6.705974394512732e-07,
|
|
"loss": 3.0714,
|
|
"step": 21500
|
|
},
|
|
{
|
|
"epoch": 4.64,
|
|
"learning_rate": 6.625780603464998e-07,
|
|
"loss": 3.082,
|
|
"step": 21510
|
|
},
|
|
{
|
|
"epoch": 4.65,
|
|
"learning_rate": 6.54606276249306e-07,
|
|
"loss": 2.9895,
|
|
"step": 21520
|
|
},
|
|
{
|
|
"epoch": 4.65,
|
|
"learning_rate": 6.466821027495573e-07,
|
|
"loss": 3.0519,
|
|
"step": 21530
|
|
},
|
|
{
|
|
"epoch": 4.65,
|
|
"learning_rate": 6.388055553440103e-07,
|
|
"loss": 3.0362,
|
|
"step": 21540
|
|
},
|
|
{
|
|
"epoch": 4.65,
|
|
"learning_rate": 6.309766494362823e-07,
|
|
"loss": 3.0237,
|
|
"step": 21550
|
|
},
|
|
{
|
|
"epoch": 4.65,
|
|
"learning_rate": 6.231954003368178e-07,
|
|
"loss": 3.0413,
|
|
"step": 21560
|
|
},
|
|
{
|
|
"epoch": 4.66,
|
|
"learning_rate": 6.154618232628723e-07,
|
|
"loss": 3.0303,
|
|
"step": 21570
|
|
},
|
|
{
|
|
"epoch": 4.66,
|
|
"learning_rate": 6.077759333384614e-07,
|
|
"loss": 3.0201,
|
|
"step": 21580
|
|
},
|
|
{
|
|
"epoch": 4.66,
|
|
"learning_rate": 6.001377455943452e-07,
|
|
"loss": 3.0257,
|
|
"step": 21590
|
|
},
|
|
{
|
|
"epoch": 4.66,
|
|
"learning_rate": 5.925472749680028e-07,
|
|
"loss": 3.0073,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 4.67,
|
|
"learning_rate": 5.850045363035878e-07,
|
|
"loss": 3.08,
|
|
"step": 21610
|
|
},
|
|
{
|
|
"epoch": 4.67,
|
|
"learning_rate": 5.775095443519119e-07,
|
|
"loss": 3.0569,
|
|
"step": 21620
|
|
},
|
|
{
|
|
"epoch": 4.67,
|
|
"learning_rate": 5.700623137704087e-07,
|
|
"loss": 3.0756,
|
|
"step": 21630
|
|
},
|
|
{
|
|
"epoch": 4.67,
|
|
"learning_rate": 5.626628591231115e-07,
|
|
"loss": 2.9925,
|
|
"step": 21640
|
|
},
|
|
{
|
|
"epoch": 4.67,
|
|
"learning_rate": 5.553111948806227e-07,
|
|
"loss": 3.0152,
|
|
"step": 21650
|
|
},
|
|
{
|
|
"epoch": 4.68,
|
|
"learning_rate": 5.48007335420081e-07,
|
|
"loss": 3.0232,
|
|
"step": 21660
|
|
},
|
|
{
|
|
"epoch": 4.68,
|
|
"learning_rate": 5.407512950251381e-07,
|
|
"loss": 3.037,
|
|
"step": 21670
|
|
},
|
|
{
|
|
"epoch": 4.68,
|
|
"learning_rate": 5.335430878859238e-07,
|
|
"loss": 3.0561,
|
|
"step": 21680
|
|
},
|
|
{
|
|
"epoch": 4.68,
|
|
"learning_rate": 5.263827280990341e-07,
|
|
"loss": 3.042,
|
|
"step": 21690
|
|
},
|
|
{
|
|
"epoch": 4.68,
|
|
"learning_rate": 5.192702296674873e-07,
|
|
"loss": 3.0384,
|
|
"step": 21700
|
|
},
|
|
{
|
|
"epoch": 4.69,
|
|
"learning_rate": 5.122056065007013e-07,
|
|
"loss": 3.0211,
|
|
"step": 21710
|
|
},
|
|
{
|
|
"epoch": 4.69,
|
|
"learning_rate": 5.051888724144693e-07,
|
|
"loss": 3.0336,
|
|
"step": 21720
|
|
},
|
|
{
|
|
"epoch": 4.69,
|
|
"learning_rate": 4.98220041130934e-07,
|
|
"loss": 3.0841,
|
|
"step": 21730
|
|
},
|
|
{
|
|
"epoch": 4.69,
|
|
"learning_rate": 4.912991262785521e-07,
|
|
"loss": 2.975,
|
|
"step": 21740
|
|
},
|
|
{
|
|
"epoch": 4.7,
|
|
"learning_rate": 4.844261413920804e-07,
|
|
"loss": 3.0994,
|
|
"step": 21750
|
|
},
|
|
{
|
|
"epoch": 4.7,
|
|
"learning_rate": 4.776010999125335e-07,
|
|
"loss": 3.0243,
|
|
"step": 21760
|
|
},
|
|
{
|
|
"epoch": 4.7,
|
|
"learning_rate": 4.70824015187174e-07,
|
|
"loss": 2.9907,
|
|
"step": 21770
|
|
},
|
|
{
|
|
"epoch": 4.7,
|
|
"learning_rate": 4.640949004694778e-07,
|
|
"loss": 3.0567,
|
|
"step": 21780
|
|
},
|
|
{
|
|
"epoch": 4.7,
|
|
"learning_rate": 4.5741376891910717e-07,
|
|
"loss": 3.0485,
|
|
"step": 21790
|
|
},
|
|
{
|
|
"epoch": 4.71,
|
|
"learning_rate": 4.507806336018855e-07,
|
|
"loss": 3.0302,
|
|
"step": 21800
|
|
},
|
|
{
|
|
"epoch": 4.71,
|
|
"learning_rate": 4.44195507489778e-07,
|
|
"loss": 2.9561,
|
|
"step": 21810
|
|
},
|
|
{
|
|
"epoch": 4.71,
|
|
"learning_rate": 4.376584034608583e-07,
|
|
"loss": 3.0077,
|
|
"step": 21820
|
|
},
|
|
{
|
|
"epoch": 4.71,
|
|
"learning_rate": 4.3116933429928065e-07,
|
|
"loss": 3.0666,
|
|
"step": 21830
|
|
},
|
|
{
|
|
"epoch": 4.72,
|
|
"learning_rate": 4.2472831269527724e-07,
|
|
"loss": 3.0037,
|
|
"step": 21840
|
|
},
|
|
{
|
|
"epoch": 4.72,
|
|
"learning_rate": 4.183353512451055e-07,
|
|
"loss": 3.0503,
|
|
"step": 21850
|
|
},
|
|
{
|
|
"epoch": 4.72,
|
|
"learning_rate": 4.1199046245103123e-07,
|
|
"loss": 3.0393,
|
|
"step": 21860
|
|
},
|
|
{
|
|
"epoch": 4.72,
|
|
"learning_rate": 4.056936587213178e-07,
|
|
"loss": 3.0015,
|
|
"step": 21870
|
|
},
|
|
{
|
|
"epoch": 4.72,
|
|
"learning_rate": 3.994449523701843e-07,
|
|
"loss": 3.0589,
|
|
"step": 21880
|
|
},
|
|
{
|
|
"epoch": 4.73,
|
|
"learning_rate": 3.9324435561779726e-07,
|
|
"loss": 3.0104,
|
|
"step": 21890
|
|
},
|
|
{
|
|
"epoch": 4.73,
|
|
"learning_rate": 3.8709188059022627e-07,
|
|
"loss": 3.0702,
|
|
"step": 21900
|
|
},
|
|
{
|
|
"epoch": 4.73,
|
|
"learning_rate": 3.8098753931944396e-07,
|
|
"loss": 3.0534,
|
|
"step": 21910
|
|
},
|
|
{
|
|
"epoch": 4.73,
|
|
"learning_rate": 3.7493134374329e-07,
|
|
"loss": 2.9991,
|
|
"step": 21920
|
|
},
|
|
{
|
|
"epoch": 4.73,
|
|
"learning_rate": 3.689233057054403e-07,
|
|
"loss": 3.0207,
|
|
"step": 21930
|
|
},
|
|
{
|
|
"epoch": 4.74,
|
|
"learning_rate": 3.6296343695539915e-07,
|
|
"loss": 3.0348,
|
|
"step": 21940
|
|
},
|
|
{
|
|
"epoch": 4.74,
|
|
"learning_rate": 3.5705174914847373e-07,
|
|
"loss": 3.0398,
|
|
"step": 21950
|
|
},
|
|
{
|
|
"epoch": 4.74,
|
|
"learning_rate": 3.5118825384573283e-07,
|
|
"loss": 2.9959,
|
|
"step": 21960
|
|
},
|
|
{
|
|
"epoch": 4.74,
|
|
"learning_rate": 3.453729625140151e-07,
|
|
"loss": 3.0605,
|
|
"step": 21970
|
|
},
|
|
{
|
|
"epoch": 4.75,
|
|
"learning_rate": 3.396058865258789e-07,
|
|
"loss": 3.006,
|
|
"step": 21980
|
|
},
|
|
{
|
|
"epoch": 4.75,
|
|
"learning_rate": 3.338870371595942e-07,
|
|
"loss": 2.9907,
|
|
"step": 21990
|
|
},
|
|
{
|
|
"epoch": 4.75,
|
|
"learning_rate": 3.2821642559912044e-07,
|
|
"loss": 3.0006,
|
|
"step": 22000
|
|
},
|
|
{
|
|
"epoch": 4.75,
|
|
"learning_rate": 3.225940629340757e-07,
|
|
"loss": 3.0078,
|
|
"step": 22010
|
|
},
|
|
{
|
|
"epoch": 4.75,
|
|
"learning_rate": 3.1701996015972847e-07,
|
|
"loss": 3.0516,
|
|
"step": 22020
|
|
},
|
|
{
|
|
"epoch": 4.76,
|
|
"learning_rate": 3.114941281769618e-07,
|
|
"loss": 3.0552,
|
|
"step": 22030
|
|
},
|
|
{
|
|
"epoch": 4.76,
|
|
"learning_rate": 3.060165777922619e-07,
|
|
"loss": 3.03,
|
|
"step": 22040
|
|
},
|
|
{
|
|
"epoch": 4.76,
|
|
"learning_rate": 3.0058731971769894e-07,
|
|
"loss": 3.0479,
|
|
"step": 22050
|
|
},
|
|
{
|
|
"epoch": 4.76,
|
|
"learning_rate": 2.952063645708908e-07,
|
|
"loss": 3.0336,
|
|
"step": 22060
|
|
},
|
|
{
|
|
"epoch": 4.76,
|
|
"learning_rate": 2.8987372287499757e-07,
|
|
"loss": 3.0098,
|
|
"step": 22070
|
|
},
|
|
{
|
|
"epoch": 4.77,
|
|
"learning_rate": 2.8458940505870224e-07,
|
|
"loss": 3.0006,
|
|
"step": 22080
|
|
},
|
|
{
|
|
"epoch": 4.77,
|
|
"learning_rate": 2.793534214561744e-07,
|
|
"loss": 3.0279,
|
|
"step": 22090
|
|
},
|
|
{
|
|
"epoch": 4.77,
|
|
"learning_rate": 2.741657823070648e-07,
|
|
"loss": 3.0492,
|
|
"step": 22100
|
|
},
|
|
{
|
|
"epoch": 4.77,
|
|
"learning_rate": 2.690264977564777e-07,
|
|
"loss": 3.0646,
|
|
"step": 22110
|
|
},
|
|
{
|
|
"epoch": 4.78,
|
|
"learning_rate": 2.639355778549568e-07,
|
|
"loss": 2.9875,
|
|
"step": 22120
|
|
},
|
|
{
|
|
"epoch": 4.78,
|
|
"learning_rate": 2.588930325584632e-07,
|
|
"loss": 3.0109,
|
|
"step": 22130
|
|
},
|
|
{
|
|
"epoch": 4.78,
|
|
"learning_rate": 2.5389887172835024e-07,
|
|
"loss": 3.0181,
|
|
"step": 22140
|
|
},
|
|
{
|
|
"epoch": 4.78,
|
|
"learning_rate": 2.4895310513135263e-07,
|
|
"loss": 3.0555,
|
|
"step": 22150
|
|
},
|
|
{
|
|
"epoch": 4.78,
|
|
"learning_rate": 2.4405574243956117e-07,
|
|
"loss": 3.006,
|
|
"step": 22160
|
|
},
|
|
{
|
|
"epoch": 4.79,
|
|
"learning_rate": 2.3920679323041207e-07,
|
|
"loss": 3.0852,
|
|
"step": 22170
|
|
},
|
|
{
|
|
"epoch": 4.79,
|
|
"learning_rate": 2.3440626698665312e-07,
|
|
"loss": 3.0004,
|
|
"step": 22180
|
|
},
|
|
{
|
|
"epoch": 4.79,
|
|
"learning_rate": 2.2965417309634695e-07,
|
|
"loss": 3.0681,
|
|
"step": 22190
|
|
},
|
|
{
|
|
"epoch": 4.79,
|
|
"learning_rate": 2.2495052085282908e-07,
|
|
"loss": 3.0736,
|
|
"step": 22200
|
|
},
|
|
{
|
|
"epoch": 4.79,
|
|
"learning_rate": 2.202953194547108e-07,
|
|
"loss": 2.9551,
|
|
"step": 22210
|
|
},
|
|
{
|
|
"epoch": 4.8,
|
|
"learning_rate": 2.1568857800584042e-07,
|
|
"loss": 3.0467,
|
|
"step": 22220
|
|
},
|
|
{
|
|
"epoch": 4.8,
|
|
"learning_rate": 2.1113030551530576e-07,
|
|
"loss": 3.0229,
|
|
"step": 22230
|
|
},
|
|
{
|
|
"epoch": 4.8,
|
|
"learning_rate": 2.0662051089740396e-07,
|
|
"loss": 3.0546,
|
|
"step": 22240
|
|
},
|
|
{
|
|
"epoch": 4.8,
|
|
"learning_rate": 2.0215920297162739e-07,
|
|
"loss": 2.9941,
|
|
"step": 22250
|
|
},
|
|
{
|
|
"epoch": 4.81,
|
|
"learning_rate": 1.9774639046264698e-07,
|
|
"loss": 3.011,
|
|
"step": 22260
|
|
},
|
|
{
|
|
"epoch": 4.81,
|
|
"learning_rate": 1.9338208200029295e-07,
|
|
"loss": 3.0244,
|
|
"step": 22270
|
|
},
|
|
{
|
|
"epoch": 4.81,
|
|
"learning_rate": 1.890662861195408e-07,
|
|
"loss": 3.0414,
|
|
"step": 22280
|
|
},
|
|
{
|
|
"epoch": 4.81,
|
|
"learning_rate": 1.847990112604947e-07,
|
|
"loss": 3.0303,
|
|
"step": 22290
|
|
},
|
|
{
|
|
"epoch": 4.81,
|
|
"learning_rate": 1.805802657683653e-07,
|
|
"loss": 3.0888,
|
|
"step": 22300
|
|
},
|
|
{
|
|
"epoch": 4.82,
|
|
"learning_rate": 1.7641005789346134e-07,
|
|
"loss": 3.021,
|
|
"step": 22310
|
|
},
|
|
{
|
|
"epoch": 4.82,
|
|
"learning_rate": 1.722883957911703e-07,
|
|
"loss": 3.0116,
|
|
"step": 22320
|
|
},
|
|
{
|
|
"epoch": 4.82,
|
|
"learning_rate": 1.682152875219417e-07,
|
|
"loss": 2.9938,
|
|
"step": 22330
|
|
},
|
|
{
|
|
"epoch": 4.82,
|
|
"learning_rate": 1.6419074105126776e-07,
|
|
"loss": 3.042,
|
|
"step": 22340
|
|
},
|
|
{
|
|
"epoch": 4.83,
|
|
"learning_rate": 1.6021476424967485e-07,
|
|
"loss": 3.0589,
|
|
"step": 22350
|
|
},
|
|
{
|
|
"epoch": 4.83,
|
|
"learning_rate": 1.5628736489270713e-07,
|
|
"loss": 3.0067,
|
|
"step": 22360
|
|
},
|
|
{
|
|
"epoch": 4.83,
|
|
"learning_rate": 1.5240855066090686e-07,
|
|
"loss": 3.0398,
|
|
"step": 22370
|
|
},
|
|
{
|
|
"epoch": 4.83,
|
|
"learning_rate": 1.4857832913980075e-07,
|
|
"loss": 3.0409,
|
|
"step": 22380
|
|
},
|
|
{
|
|
"epoch": 4.83,
|
|
"learning_rate": 1.4479670781988863e-07,
|
|
"loss": 3.0694,
|
|
"step": 22390
|
|
},
|
|
{
|
|
"epoch": 4.84,
|
|
"learning_rate": 1.4106369409662422e-07,
|
|
"loss": 3.0601,
|
|
"step": 22400
|
|
},
|
|
{
|
|
"epoch": 4.84,
|
|
"learning_rate": 1.3737929527040117e-07,
|
|
"loss": 3.0397,
|
|
"step": 22410
|
|
},
|
|
{
|
|
"epoch": 4.84,
|
|
"learning_rate": 1.3374351854654466e-07,
|
|
"loss": 3.0494,
|
|
"step": 22420
|
|
},
|
|
{
|
|
"epoch": 4.84,
|
|
"learning_rate": 1.3015637103529487e-07,
|
|
"loss": 3.0838,
|
|
"step": 22430
|
|
},
|
|
{
|
|
"epoch": 4.84,
|
|
"learning_rate": 1.266178597517792e-07,
|
|
"loss": 3.0292,
|
|
"step": 22440
|
|
},
|
|
{
|
|
"epoch": 4.85,
|
|
"learning_rate": 1.2312799161602606e-07,
|
|
"loss": 3.0388,
|
|
"step": 22450
|
|
},
|
|
{
|
|
"epoch": 4.85,
|
|
"learning_rate": 1.1968677345292612e-07,
|
|
"loss": 2.9938,
|
|
"step": 22460
|
|
},
|
|
{
|
|
"epoch": 4.85,
|
|
"learning_rate": 1.1629421199222667e-07,
|
|
"loss": 3.1058,
|
|
"step": 22470
|
|
},
|
|
{
|
|
"epoch": 4.85,
|
|
"learning_rate": 1.1295031386853171e-07,
|
|
"loss": 3.0278,
|
|
"step": 22480
|
|
},
|
|
{
|
|
"epoch": 4.86,
|
|
"learning_rate": 1.0965508562126581e-07,
|
|
"loss": 3.0636,
|
|
"step": 22490
|
|
},
|
|
{
|
|
"epoch": 4.86,
|
|
"learning_rate": 1.0640853369467963e-07,
|
|
"loss": 3.0291,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"epoch": 4.86,
|
|
"learning_rate": 1.0321066443783056e-07,
|
|
"loss": 3.0297,
|
|
"step": 22510
|
|
},
|
|
{
|
|
"epoch": 4.86,
|
|
"learning_rate": 1.0006148410456883e-07,
|
|
"loss": 3.0296,
|
|
"step": 22520
|
|
},
|
|
{
|
|
"epoch": 4.86,
|
|
"learning_rate": 9.696099885352916e-08,
|
|
"loss": 3.0684,
|
|
"step": 22530
|
|
},
|
|
{
|
|
"epoch": 4.87,
|
|
"learning_rate": 9.390921474811409e-08,
|
|
"loss": 2.9799,
|
|
"step": 22540
|
|
},
|
|
{
|
|
"epoch": 4.87,
|
|
"learning_rate": 9.090613775648571e-08,
|
|
"loss": 3.0479,
|
|
"step": 22550
|
|
},
|
|
{
|
|
"epoch": 4.87,
|
|
"learning_rate": 8.79517737515545e-08,
|
|
"loss": 3.0182,
|
|
"step": 22560
|
|
},
|
|
{
|
|
"epoch": 4.87,
|
|
"learning_rate": 8.504612851096272e-08,
|
|
"loss": 3.0387,
|
|
"step": 22570
|
|
},
|
|
{
|
|
"epoch": 4.87,
|
|
"learning_rate": 8.218920771708439e-08,
|
|
"loss": 3.0355,
|
|
"step": 22580
|
|
},
|
|
{
|
|
"epoch": 4.88,
|
|
"learning_rate": 7.938101695699473e-08,
|
|
"loss": 3.0373,
|
|
"step": 22590
|
|
},
|
|
{
|
|
"epoch": 4.88,
|
|
"learning_rate": 7.662156172248136e-08,
|
|
"loss": 3.0462,
|
|
"step": 22600
|
|
},
|
|
{
|
|
"epoch": 4.88,
|
|
"learning_rate": 7.39108474100192e-08,
|
|
"loss": 3.0091,
|
|
"step": 22610
|
|
},
|
|
{
|
|
"epoch": 4.88,
|
|
"learning_rate": 7.124887932076496e-08,
|
|
"loss": 3.0797,
|
|
"step": 22620
|
|
},
|
|
{
|
|
"epoch": 4.89,
|
|
"learning_rate": 6.863566266054333e-08,
|
|
"loss": 3.0681,
|
|
"step": 22630
|
|
},
|
|
{
|
|
"epoch": 4.89,
|
|
"learning_rate": 6.607120253984134e-08,
|
|
"loss": 3.076,
|
|
"step": 22640
|
|
},
|
|
{
|
|
"epoch": 4.89,
|
|
"learning_rate": 6.355550397379451e-08,
|
|
"loss": 3.0132,
|
|
"step": 22650
|
|
},
|
|
{
|
|
"epoch": 4.89,
|
|
"learning_rate": 6.108857188218131e-08,
|
|
"loss": 3.0115,
|
|
"step": 22660
|
|
},
|
|
{
|
|
"epoch": 4.89,
|
|
"learning_rate": 5.867041108941207e-08,
|
|
"loss": 3.0621,
|
|
"step": 22670
|
|
},
|
|
{
|
|
"epoch": 4.9,
|
|
"learning_rate": 5.630102632451228e-08,
|
|
"loss": 3.031,
|
|
"step": 22680
|
|
},
|
|
{
|
|
"epoch": 4.9,
|
|
"learning_rate": 5.398042222112265e-08,
|
|
"loss": 3.022,
|
|
"step": 22690
|
|
},
|
|
{
|
|
"epoch": 4.9,
|
|
"learning_rate": 5.1708603317490725e-08,
|
|
"loss": 3.0581,
|
|
"step": 22700
|
|
},
|
|
{
|
|
"epoch": 4.9,
|
|
"learning_rate": 4.948557405645426e-08,
|
|
"loss": 3.0281,
|
|
"step": 22710
|
|
},
|
|
{
|
|
"epoch": 4.91,
|
|
"learning_rate": 4.731133878543847e-08,
|
|
"loss": 3.047,
|
|
"step": 22720
|
|
},
|
|
{
|
|
"epoch": 4.91,
|
|
"learning_rate": 4.518590175644211e-08,
|
|
"loss": 3.035,
|
|
"step": 22730
|
|
},
|
|
{
|
|
"epoch": 4.91,
|
|
"learning_rate": 4.310926712603469e-08,
|
|
"loss": 3.0415,
|
|
"step": 22740
|
|
},
|
|
{
|
|
"epoch": 4.91,
|
|
"learning_rate": 4.1081438955348215e-08,
|
|
"loss": 3.0313,
|
|
"step": 22750
|
|
},
|
|
{
|
|
"epoch": 4.91,
|
|
"learning_rate": 3.910242121006602e-08,
|
|
"loss": 3.0021,
|
|
"step": 22760
|
|
},
|
|
{
|
|
"epoch": 4.92,
|
|
"learning_rate": 3.717221776041446e-08,
|
|
"loss": 3.0509,
|
|
"step": 22770
|
|
},
|
|
{
|
|
"epoch": 4.92,
|
|
"learning_rate": 3.5290832381160155e-08,
|
|
"loss": 3.0877,
|
|
"step": 22780
|
|
},
|
|
{
|
|
"epoch": 4.92,
|
|
"learning_rate": 3.3458268751593305e-08,
|
|
"loss": 3.047,
|
|
"step": 22790
|
|
},
|
|
{
|
|
"epoch": 4.92,
|
|
"learning_rate": 3.167453045553603e-08,
|
|
"loss": 3.0126,
|
|
"step": 22800
|
|
},
|
|
{
|
|
"epoch": 4.92,
|
|
"learning_rate": 2.993962098132297e-08,
|
|
"loss": 3.0296,
|
|
"step": 22810
|
|
},
|
|
{
|
|
"epoch": 4.93,
|
|
"learning_rate": 2.8253543721790122e-08,
|
|
"loss": 3.0058,
|
|
"step": 22820
|
|
},
|
|
{
|
|
"epoch": 4.93,
|
|
"learning_rate": 2.6616301974285994e-08,
|
|
"loss": 3.0456,
|
|
"step": 22830
|
|
},
|
|
{
|
|
"epoch": 4.93,
|
|
"learning_rate": 2.5027898940649386e-08,
|
|
"loss": 3.0723,
|
|
"step": 22840
|
|
},
|
|
{
|
|
"epoch": 4.93,
|
|
"learning_rate": 2.348833772721215e-08,
|
|
"loss": 3.0347,
|
|
"step": 22850
|
|
},
|
|
{
|
|
"epoch": 4.94,
|
|
"learning_rate": 2.1997621344785335e-08,
|
|
"loss": 3.0497,
|
|
"step": 22860
|
|
},
|
|
{
|
|
"epoch": 4.94,
|
|
"learning_rate": 2.055575270865917e-08,
|
|
"loss": 3.028,
|
|
"step": 22870
|
|
},
|
|
{
|
|
"epoch": 4.94,
|
|
"learning_rate": 1.9162734638597524e-08,
|
|
"loss": 3.0774,
|
|
"step": 22880
|
|
},
|
|
{
|
|
"epoch": 4.94,
|
|
"learning_rate": 1.7818569858826793e-08,
|
|
"loss": 3.0382,
|
|
"step": 22890
|
|
},
|
|
{
|
|
"epoch": 4.94,
|
|
"learning_rate": 1.6523260998041467e-08,
|
|
"loss": 3.074,
|
|
"step": 22900
|
|
},
|
|
{
|
|
"epoch": 4.95,
|
|
"learning_rate": 1.5276810589381907e-08,
|
|
"loss": 3.0258,
|
|
"step": 22910
|
|
},
|
|
{
|
|
"epoch": 4.95,
|
|
"learning_rate": 1.4079221070448234e-08,
|
|
"loss": 2.9724,
|
|
"step": 22920
|
|
},
|
|
{
|
|
"epoch": 4.95,
|
|
"learning_rate": 1.29304947832809e-08,
|
|
"loss": 3.0074,
|
|
"step": 22930
|
|
},
|
|
{
|
|
"epoch": 4.95,
|
|
"learning_rate": 1.1830633974363459e-08,
|
|
"loss": 3.0574,
|
|
"step": 22940
|
|
},
|
|
{
|
|
"epoch": 4.95,
|
|
"learning_rate": 1.077964079462257e-08,
|
|
"loss": 3.0382,
|
|
"step": 22950
|
|
},
|
|
{
|
|
"epoch": 4.96,
|
|
"learning_rate": 9.777517299408567e-09,
|
|
"loss": 3.0286,
|
|
"step": 22960
|
|
},
|
|
{
|
|
"epoch": 4.96,
|
|
"learning_rate": 8.824265448503789e-09,
|
|
"loss": 2.9604,
|
|
"step": 22970
|
|
},
|
|
{
|
|
"epoch": 4.96,
|
|
"learning_rate": 7.919887106119794e-09,
|
|
"loss": 3.0551,
|
|
"step": 22980
|
|
},
|
|
{
|
|
"epoch": 4.96,
|
|
"learning_rate": 7.064384040886274e-09,
|
|
"loss": 2.9933,
|
|
"step": 22990
|
|
},
|
|
{
|
|
"epoch": 4.97,
|
|
"learning_rate": 6.257757925848262e-09,
|
|
"loss": 2.993,
|
|
"step": 23000
|
|
},
|
|
{
|
|
"epoch": 4.97,
|
|
"learning_rate": 5.500010338471695e-09,
|
|
"loss": 3.0509,
|
|
"step": 23010
|
|
},
|
|
{
|
|
"epoch": 4.97,
|
|
"learning_rate": 4.79114276062953e-09,
|
|
"loss": 3.0074,
|
|
"step": 23020
|
|
},
|
|
{
|
|
"epoch": 4.97,
|
|
"learning_rate": 4.131156578604523e-09,
|
|
"loss": 3.0275,
|
|
"step": 23030
|
|
},
|
|
{
|
|
"epoch": 4.97,
|
|
"learning_rate": 3.5200530830864496e-09,
|
|
"loss": 3.021,
|
|
"step": 23040
|
|
},
|
|
{
|
|
"epoch": 4.98,
|
|
"learning_rate": 2.95783346916656e-09,
|
|
"loss": 3.031,
|
|
"step": 23050
|
|
},
|
|
{
|
|
"epoch": 4.98,
|
|
"learning_rate": 2.4444988363431232e-09,
|
|
"loss": 3.0166,
|
|
"step": 23060
|
|
},
|
|
{
|
|
"epoch": 4.98,
|
|
"learning_rate": 1.9800501885047782e-09,
|
|
"loss": 3.0235,
|
|
"step": 23070
|
|
},
|
|
{
|
|
"epoch": 4.98,
|
|
"learning_rate": 1.564488433944411e-09,
|
|
"loss": 3.0617,
|
|
"step": 23080
|
|
},
|
|
{
|
|
"epoch": 4.98,
|
|
"learning_rate": 1.1978143853424995e-09,
|
|
"loss": 3.024,
|
|
"step": 23090
|
|
},
|
|
{
|
|
"epoch": 4.99,
|
|
"learning_rate": 8.800287597837686e-10,
|
|
"loss": 3.0316,
|
|
"step": 23100
|
|
},
|
|
{
|
|
"epoch": 4.99,
|
|
"learning_rate": 6.111321787377611e-10,
|
|
"loss": 3.0242,
|
|
"step": 23110
|
|
},
|
|
{
|
|
"epoch": 4.99,
|
|
"learning_rate": 3.911251680643879e-10,
|
|
"loss": 3.0317,
|
|
"step": 23120
|
|
},
|
|
{
|
|
"epoch": 4.99,
|
|
"learning_rate": 2.200081580167046e-10,
|
|
"loss": 3.0298,
|
|
"step": 23130
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"learning_rate": 9.778148324091074e-11,
|
|
"loss": 3.084,
|
|
"step": 23140
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"learning_rate": 2.4445382762472435e-11,
|
|
"loss": 3.051,
|
|
"step": 23150
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"learning_rate": 0.0,
|
|
"loss": 2.9821,
|
|
"step": 23160
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_loss": 3.0402331352233887,
|
|
"eval_runtime": 191.4444,
|
|
"eval_samples_per_second": 774.22,
|
|
"eval_steps_per_second": 24.195,
|
|
"step": 23160
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"step": 23160,
|
|
"total_flos": 8.495719075924673e+18,
|
|
"train_loss": 3.1306352193697333,
|
|
"train_runtime": 16128.3584,
|
|
"train_samples_per_second": 183.799,
|
|
"train_steps_per_second": 1.436
|
|
}
|
|
],
|
|
"max_steps": 23160,
|
|
"num_train_epochs": 5,
|
|
"total_flos": 8.495719075924673e+18,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|