diff --git a/tokenizer.json b/tokenizer.json index b656fc0..c5fdc76 100644 --- a/tokenizer.json +++ b/tokenizer.json @@ -1 +1,799 @@ -{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"<|startoftext|>","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},{"id":1,"special":true,"content":"<|endoftext|>","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":null,"pre_tokenizer":{"type":"ByteLevel","add_prefix_space":false,"trim_offsets":true},"post_processor":{"type":"ByteLevel","add_prefix_space":true,"trim_offsets":false},"decoder":{"type":"ByteLevel","add_prefix_space":true,"trim_offsets":true},"model":{"type":"BPE","dropout":null,"unk_token":null,"continuing_subword_prefix":"","end_of_word_suffix":"","fuse_unk":false,"vocab":{"<|startoftext|>":0,"<|endoftext|>":1,"!":2,"\"":3,"#":4,"$":5,"%":6,"&":7,"'":8,"(":9,")":10,"*":11,"+":12,",":13,"-":14,".":15,"/":16,"0":17,"1":18,"2":19,"3":20,"4":21,"5":22,"6":23,"7":24,"8":25,"9":26,":":27,";":28,"<":29,"=":30,">":31,"?":32,"@":33,"A":34,"B":35,"C":36,"D":37,"E":38,"F":39,"G":40,"H":41,"I":42,"J":43,"K":44,"L":45,"M":46,"N":47,"O":48,"P":49,"Q":50,"R":51,"S":52,"T":53,"U":54,"V":55,"W":56,"X":57,"Y":58,"Z":59,"[":60,"\\":61,"]":62,"^":63,"_":64,"`":65,"a":66,"b":67,"c":68,"d":69,"e":70,"f":71,"g":72,"h":73,"i":74,"j":75,"k":76,"l":77,"m":78,"n":79,"o":80,"p":81,"q":82,"r":83,"s":84,"t":85,"u":86,"v":87,"w":88,"x":89,"y":90,"z":91,"|":92,"}":93,"~":94,"¡":95,"¢":96,"£":97,"¤":98,"¥":99,"¦":100,"§":101,"¨":102,"©":103,"ª":104,"«":105,"¬":106,"®":107,"¯":108,"°":109,"±":110,"²":111,"³":112,"´":113,"µ":114,"¶":115,"·":116,"¸":117,"¹":118,"º":119,"»":120,"¼":121,"½":122,"¾":123,"¿":124,"Â":125,"Ã":126,"Ä":127,"Å":128,"Æ":129,"Ç":130,"È":131,"É":132,"Ê":133,"Ë":134,"Ì":135,"Í":136,"Î":137,"Ï":138,"Ð":139,"Ñ":140,"Ö":141,"×":142,"Ø":143,"Ù":144,"Ü":145,"à":146,"á":147,"â":148,"ã":149,"ä":150,"å":151,"æ":152,"ç":153,"è":154,"é":155,"ë":156,"ì":157,"ï":158,"Ċ":159,"Ġ":160,"Ģ":161,"ģ":162,"Ĥ":163,"ĥ":164,"Ħ":165,"ħ":166,"Ĩ":167,"ĩ":168,"Ī":169,"ī":170,"Ĭ":171,"ĭ":172,"Į":173,"į":174,"İ":175,"ı":176,"IJ":177,"ij":178,"Ĵ":179,"ĵ":180,"Ķ":181,"ķ":182,"ĸ":183,"Ĺ":184,"ĺ":185,"Ļ":186,"ļ":187,"Ľ":188,"ľ":189,"Ŀ":190,"ŀ":191,"Ł":192,"ł":193,"Ń":194,"e":195,"d":196,"a":197,"o":198,"n":199,"±":200,"l":201,"m":202,"h":203,"r":204,"i":205,"s":206,"Z":207,"t":208,"f":209,"k":210,"y":211,"b":212,"F":213,"g":214,"7":215,"0":216,"p":217,"L":218,"H":219,"¡":220,"Ī":221,"1":222,"Ģ":223,"c":224,"ĩ":225,"6":226,"A":227,"z":228,"u":229,"S":230,"2":231,"v":232,"4":233,"M":234,"T":235,"8":236,"I":237,"N":238,"C":239,"5":240,"¹":241,"9":242,"3":243,"ī":244,"P":245,"E":246,"»":247,"V":248,"İ":249,"w":250,"J":251,"ł":252,".":253,"K":254,"D":255,"Ķ":256,"¸":257,"B":258,"©":259,"º":260,"µ":261,"Ĥ":262,"X":263,"R":264,"O":265,"«":266,"Ļ":267,"U":268,"x":269,"[":270,"¿":271,"³":272,"ģ":273,"W":274,"§":275,"-":276,"ĸ":277,"Ħ":278,",":279,"q":280,"ħ":281,"¨":282,"G":283,"²":284,"ĺ":285,"ª":286,"¯":287,"j":288,"]":289,"ļ":290,"Ŀ":291,"¤":292,"ŀ":293,"½":294,"IJ":295,"'":296,"Ń":297,"°":298,"ľ":299,">":300,"¶":301,"į":302,"¦":303,"|":304,"¼":305,"¢":306,"´":307,"Ĩ":308,"Q":309,"Y":310,"Ľ":311,"ĵ":312,"ij":313,"ķ":314,"Ĭ":315,"¾":316,";":317,"(":318,"¬":319,"@":320,"ĭ":321,"Ĺ":322,"£":323,"Į":324,"#":325,"·":326,"*":327,"Ĵ":328,"®":329,")":330,"^":331,"ı":332,"Ġ":333,"_":334,"Ł":335,"}":336,"ĥ":337,"\\":338,"¥":339,"<":340,"+":341,"=":342,"~":343,"\"":344,"!":345,"?":346,"`":347,"$":348,"Ċ":349,"/":350,"%":351,"&":352,":":353,"Ġt":354,"Ġth":355,"Ġa":356,"Ġthe":357,"in":358,"Ġo":359,"Ġ,":360,"Ġs":361,"ed":362,"Ġw":363,"er":364,"Ġ.":365,"Ġi":366,"re":367,"Ġc":368,"nd":369,"Ġf":370,"Ġb":371,"at":372,"Ġof":373,"er":374,"en":375,"ar":376,"or":377,"it":378,"Ġp":379,"Ġh":380,"Ġand":381,"on":382,"ing":383,"an":384,"ro":385,"Ġm":386,"Ġd":387,"es":388,"Ġin":389,"on":390,"Ġto":391,"ou":392,"is":393,"Ġa":394,"ic":395,"ĠT":396,"al":397,"Ġl":398,"Ġ=":399,"Ġre":400,"Ġ\"":401,"es":402,"ĠS":403,"as":404,"al":405,"il":406,"el":407,"ion":408,"ĠA":409,"ĠC":410,"Ġ1":411,"ĠĊ":412,"ur":413,"ĠTh":414,"Ġn":415,"as":416,"Ġ@":417,"ec":418,"om":419,"ac":420,"Ġe":421,"Ġwas":422,"ĠM":423,"or":424,"an":425,"am":426,"en":427,"ol":428,"Ġin":429,"Ġg":430,"Ġ'":431,"ĠB":432,"ly":433,"at":434,"iv":435,"ts":436,"ĠThe":437,"us":438,"-@":439,"Ġ@-@":440,"is":441,"ĠI":442,"Ġwh":443,"ig":444,"ĠH":445,"Ġst":446,"os":447,"un":448,"th":449,"ĠP":450,"Ġwit":451,"Ġthat":452,"ir":453,"Ġas":454,"em":455,"Ġon":456,"ra":457,"Ġfor":458,"ĠR":459,"et":460,"ow":461,"Ġ2":462,"id":463,"ĠD":464,"le":465,"Ġwith":466,"la":467,"ent":468,"im":469,"ĠF":470,"ea":471,"ion":472,"Ġby":473,"Ġ)":474,"Ġ(":475,"Ġal":476,"Ġcon":477,"ent":478,"ĠW":479,"Ġis":480,"ere":481,"ĠG":482,"ĠN":483,"ĠL":484,"Ġha":485,"ers":486,"ri":487,"th":488,"ted":489,"uc":490,"ĠJ":491,"Ġ19":492,"ev":493,"ul":494,"Ġv":495,"ce":496,"ation":497,"rom":498,"Ġbe":499,"ĠE":500,"in":501,"Ġthe":502,"Ġfrom":503,"ĠO":504,"ter":505,"Ġpro":506,"Ġar":507,"ad":508,"Ġcom":509,"ic":510,"ag":511,"Ġhis":512,"Ġsh":513,"Ġat":514,"ov":515,"ies":516,"oo":517,"pp":518,"st":519,"ch":520,"Ġr":521,"Ġ20":522,"ay":523,"if":524,"Ġwere":525,"Ġch":526,"ut":527,"st":528,"ut":529,"ds":530,"op":531,"um":532,"Ġit":533,"oc":534,"ter":535,"le":536,"igh":537,"ud":538,"Ġex":539,"ions":540,"ate":541,"ity":542,"ated":543,"Ġun":544,"ep":545,"qu":546,"Ġno":547,"ĠK":548,"ive":549,"ist":550,"Ġon":551,"ame":552,"oun":553,"ir":554,"ab":555,"Ġâ":556,"ing":557,"Ġhe":558,"ld":559,"ug":560,"ich":561,"Ġan":562,"ed":563,"Ġk":564,"ĠâĢ":565,"Ġhad":566,"ve":567,"ain":568,"Ġse":569,"tion":570,"ore":571,"res":572,"Ġwhich":573,"ĠIn":574,"od":575,"ther":576,"ak":577,"Ġsp":578,"ar":579,"Ġy":580,"ĠCh":581,"ong":582,"Ġac":583,"est":584,"ĠU":585,"ap":586,"ff":587,"ally":588,"rit":589,"ĠSt":590,"ub":591,"ge":592,"ber":593,"et":594,"Ġbe":595,"ear":596,"Ġrec":597,"ers":598,"Ġfir":599,"ot":600,"Ġare":601,"Ġan":602,"ch":603,"og":604,"ia":605,"est":606,"ine":607,"ill":608,"and":609,"el":610,"ary":611,"ew":612,"id":613,"Ġfor":614,"Ġ;":615,"Ġcomp":616,"ĠV":617,"Ġinc":618,"tr":619,"Ġ200":620,"Ġtheir":621,"us":622,"Ġbut":623,"ran":624,"ical":625,"Ġfirst":626,"Ġde":627,"Ġint":628,"Ġro":629,"so":630,"ĠâĢĵ":631,"Ġnot":632,"ding":633,"fter":634,"ure":635,"Ġpar":636,"Ġ:":637,"ian":638,"Ġtw":639,"ould":640,"Ġalso":641,"Ġits":642,"Ġwor":643,"um":644,"Ġor":645,"ost":646,"00":647,"our":648,"ard":649,"Ġres":650,"mp":651,"ue":652,"Ġab":653,"ish":654,"Ġcont":655,"Ġad":656,"own":657,"all":658,"oug":659,"Ġher":660,"ast":661,"Ġen":662,"ome":663,"all":664,"ded":665,"ow":666,"Ġhave":667,"Ġus":668,"ear":669,"ack":670,"duc":671,"ial":672,"ss":673,"ents":674,"ain":675,"ting":676,"Ġone":677,"ess":678,"Ġhas":679,"ight":680,"av":681,"Ġev":682,"out":683,"ay":684,"ence":685,"Ġbeen":686,"ew":687,"Ġtwo":688,"Ġcl":689,"der":690,"ime":691,"ks":692,"ess":693,"ish":694,".@":695,"Ġ@.@":696,"Ġpla":697,"Ġpl":698,"Ġor":699,"up":700,"ment":701,"uring":702,"oll":703,"ĠIn":704,"Ġthis":705,"Ġbec":706,"Ġcomm":707,"Ġdis":708,"ater":709,"age":710,"Ġapp":711,"ous":712,"ey":713,"il":714,"per":715,"ĠAl":716,"ional":717,"lud":718,"ely":719,"tt":720,"ile":721,"iz":722,"Ġj":723,"Ġwho":724,"Ġag":725,"ib":726,"Ġthey":727,"for":728,"Ġov":729,"ath":730,"eg":731,"Ġsc":732,"ip":733,"Ġ201":734,"Ġ3":735,"Ġper":736,"ory":737,"Ġdes":738,"ide":739,"Ġser":740,"se":741,"ĠHe":742,"land":743,"ations":744,"ric":745,"it":746,"res":747,"ered":748,"Ġpre":749,"ĠSh":750,"ance":751,"ort":752,"ant":753,",@":754,"Ġ@,@":755,"ell":756,"ĠY":757,"ned":758,"ell":759,"ite":760,"Ġinclud":761,"Ġrep":762,"Ġafter":763,"Ġsuc":764,"ree":765,"any":766,"im":767,"ort":768,"Ġ18":769,"Ġsu":770,"ade":771,"our":772,"ĠUn":773,"ĠIt":774,"ik":775,"ĠMar":776,"ember":777,"Ġ1":778,"een":779,"and":780,"Ġsec":781,"ice":782,"Ġtime":783,"ĠAn":784,"Ġinto":785,"Ġfin":786,"Ġother":787,"Ġatt":788,"ill":789,"ren":790,"ach":791,"ass":792,"eral":793,"ese":794,"sh":795,"als":796,"ition":797,"ough":798,"les":799,"amp":800,"Ġwould":801,"Ġmore":802,"roug":803,"rib":804,"ery":805,"ace":806,"ĠA":807,"Ġplay":808,"ited":809,"ked":810,"ist":811,"ied":812,"Ġ2":813,"ased":814,"ings":815,"ang":816,"am":817,"ip":818,"Ġbo":819,"able":820,"ty":821,"Ġchar":822,"Ġcent":823,"etw":824,"ates":825,"rop":826,"ĠI":827,"und":828,"ĠAm":829,"ces":830,"oin":831,"Ġinter":832,"up":833,"ct":834,"one":835,"Ġtra":836,"ant":837,"ect":838,"Ġall":839,"ef":840,"Ġcons":841,"ubl":842,"ning":843,"ans":844,"Ġfe":845,"ust":846,"Ġ0":847,"Ġrem":848,"ase":849,"ong":850,"Ġwhen":851,"eb":852,"ĠWh":853,"Ġear":854,"ever":855,"Ġover":856,"Ġkn":857,"aus":858,"Ġpos":859,"ad":860,"erm":861,"Ġshe":862,"Ġra":863,"Ġduring":864,"ason":865,"vi":866,"Ġexp":867,"Ġlea":868,"Ġel":869,"Ġ4":870,"Ġonly":871,"ond":872,"Ġdec":873,"Ġacc":874,"Ġoff":875,"iss":876,"Ġfl":877,"ĠEn":878,"ot":879,"ens":880,"ose":881,"ake":882,"om":883,"Ġsev":884,"ach":885,"etween":886,"ern":887,"Ġ3":888,"Ġpr":889,"Ġgro":890,"ruc":891,"Ġdi":892,"Ġ199":893,"ĠAr":894,"Ġgame":895,"Ġhim":896,"ook":897,"Ġup":898,"Ġabout":899,"Ġrel":900,"form":901,"Ġthree":902,"att":903,"ĠCom":904,"Ġsa":905,"ears":906,"Ġ5":907,"ry":908,"Ġimp":909,"Ġmost":910,"fer":911,"Ġpres":912,"Ġfil":913,"Ġbetween":914,"Ġbeg":915,"ph":916,"ors":917,"Ġthan":918,"Ġrecor":919,"ob":920,"eric":921,"ating":922,"Ġthroug":923,"king":924,"Ġout":925,"Ġnum":926,"ood":927,"ollow":928,"act":929,"uil":930,"Ġcre":931,"olog":932,"ational":933,"Ġproduc":934,"Ġwhile":935,"Ġlater":936,"Ġwrit":937,"ex":938,"Ġstar":939,"Ġspec":940,"ee":941,"ished":942,"Ġreg":943,"ision":944,"outh":945,"Ġrele":946,"Ġass":947,"Ġseason":948,"Ġmade":949,"ily":950,"ru":951,"oy":952,"tur":953,"te":954,"Ġqu":955,"Ġmov":956,"ury":957,"ĠAmeric":958,"ement":959,"cc":960,"ound":961,"Ġlar":962,"Ġform":963,"ect":964,"Ġdef":965,"Ġmus":966,"ĠPar":967,"Ġme":968,"Ġsub":969,"way":970,"op":971,"oh":972,"eld":973,"ie":974,"emp":975,"ames":976,"ern":977,"Ġnor":978,"ived":979,"evel":980,"Ġsuch":981,"ards":982,"Ġind":983,"ike":984,"Ġgen":985,"ert":986,"Ġyear":987,"Ġused":988,"Ġnew":989,"Ġ5":990,"Ġalb":991,"sp":992,"yp":993,"Ġwith":994,"Ġwhere":995,"ics":996,"ĠThis":997,"Ġthem":998,"wn":999},"merges":["Ġ t","Ġt h","Ġ a","Ġth e","i n","Ġ o","Ġ ,","Ġ s","e d","Ġ w","e r","Ġ .","Ġ i","r e","Ġ c","n d","Ġ f","Ġ b","a t","Ġo f","e r","e n","a r","o r","i t","Ġ p","Ġ h","Ġa nd","o n","in g","a n","r o","Ġ m","Ġ d","e s","Ġi n","o n","Ġt o","o u","i s","Ġ a","i c","Ġ T","a l","Ġ l","Ġ =","Ġ re","Ġ \"","e s","Ġ S","a s","a l","i l","e l","i on","Ġ A","Ġ C","Ġ 1","Ġ Ċ","u r","ĠT h","Ġ n","a s","Ġ @","e c","o m","a c","Ġ e","Ġw as","Ġ M","o r","a n","a m","e n","o l","Ġ in","Ġ g","Ġ '","Ġ B","l y","a t","i v","t s","ĠTh e","u s","- @","Ġ@ -@","i s","Ġ I","Ġw h","i g","Ġ H","Ġs t","o s","u n","t h","Ġ P","Ġw it","Ġth at","i r","Ġa s","e m","Ġo n","r a","Ġf or","Ġ R","e t","o w","Ġ 2","i d","Ġ D","l e","Ġwit h","l a","en t","i m","Ġ F","e a","i on","Ġb y","Ġ )","Ġ (","Ġa l","Ġc on","en t","Ġ W","Ġi s","er e","Ġ G","Ġ N","Ġ L","Ġh a","er s","r i","t h","t ed","u c","Ġ J","Ġ1 9","e v","u l","Ġ v","c e","at ion","ro m","Ġb e","Ġ E","i n","Ġth e","Ġf rom","Ġ O","t er","Ġp ro","Ġa r","a d","Ġc om","i c","a g","Ġh is","Ġs h","Ġa t","o v","i es","o o","p p","s t","c h","Ġ r","Ġ2 0","a y","i f","Ġw ere","Ġc h","u t","s t","u t","d s","o p","u m","Ġi t","o c","t er","l e","ig h","u d","Ġe x","ion s","at e","it y","at ed","Ġ un","e p","q u","Ġn o","Ġ K","iv e","is t","Ġo n","am e","ou n","i r","a b","Ġ â","in g","Ġh e","l d","u g","ic h","Ġa n","e d","Ġ k","Ġâ Ģ","Ġha d","v e","a in","Ġs e","t ion","or e","re s","Ġwh ich","ĠI n","o d","th er","a k","Ġs p","a r","Ġ y","ĠC h","on g","Ġa c","es t","Ġ U","a p","f f","al ly","r it","ĠS t","u b","g e","b er","e t","Ġb e","e ar","Ġre c","er s","Ġf ir","o t","Ġar e","Ġa n","c h","o g","i a","es t","in e","il l","an d","e l","ar y","e w","i d","Ġf or","Ġ ;","Ġcom p","Ġ V","Ġin c","t r","Ġ20 0","Ġthe ir","u s","Ġb ut","r an","ic al","Ġfir st","Ġd e","Ġin t","Ġ ro","s o","ĠâĢ ĵ","Ġno t","d ing","f ter","ur e","Ġp ar","Ġ :","i an","Ġt w","ou ld","Ġal so","Ġi ts","Ġw or","u m","Ġo r","os t","0 0","ou r","ar d","Ġre s","m p","u e","Ġa b","is h","Ġcon t","Ġa d","ow n","al l","ou g","Ġh er","as t","Ġ en","om e","al l","d ed","o w","Ġha ve","Ġ us","ea r","ac k","d uc","i al","s s","en ts","a in","t ing","Ġon e","es s","Ġh as","igh t","a v","Ġe v","ou t","a y","en ce","Ġbe en","e w","Ġtw o","Ġc l","d er","im e","k s","es s","is h",". @","Ġ@ .@","Ġp la","Ġp l","Ġo r","u p","m ent","ur ing","ol l","ĠI n","Ġth is","Ġb ec","Ġcom m","Ġd is","at er","ag e","Ġa pp","ou s","e y","i l","p er","ĠA l","ion al","l ud","el y","t t","il e","i z","Ġ j","Ġwh o","Ġa g","i b","Ġthe y","f or","Ġo v","at h","e g","Ġs c","i p","Ġ20 1","Ġ 3","Ġp er","or y","Ġd es","id e","Ġs er","s e","ĠH e","la nd","at ions","r ic","i t","re s","er ed","Ġp re","ĠS h","an ce","or t","an t",", @","Ġ@ ,@","el l","Ġ Y","n ed","el l","it e","Ġinc lud","Ġre p","Ġa fter","Ġs uc","re e","an y","i m","or t","Ġ1 8","Ġs u","ad e","ou r","ĠU n","ĠI t","i k","ĠM ar","em ber","Ġ 1","e en","a nd","Ġs ec","ic e","Ġt ime","ĠA n","Ġint o","Ġf in","Ġo ther","Ġa tt","il l","re n","ac h","as s","er al","es e","s h","al s","it ion","oug h","l es","am p","Ġw ould","Ġm ore","ro ug","ri b","er y","ac e","Ġ A","Ġpla y","it ed","k ed","is t","i ed","Ġ 2","as ed","ing s","an g","a m","i p","Ġb o","ab le","t y","Ġch ar","Ġc ent","et w","at es","ro p","Ġ I","u nd","ĠA m","c es","o in","Ġin ter","u p","c t","on e","Ġt ra","an t","ec t","Ġal l","e f","Ġcon s","ub l","n ing","an s","Ġf e","us t","Ġ 0","Ġre m","as e","on g","Ġwh en","e b","ĠW h","Ġe ar","ev er","Ġov er","Ġk n","a us","Ġp os","a d","er m","Ġsh e","Ġ ra","Ġd uring","as on","v i","Ġex p","Ġl ea","Ġ el","Ġ 4","Ġon ly","o nd","Ġd ec","Ġac c","Ġo ff","is s","Ġf l","ĠE n","o t","en s","os e","ak e","o m","Ġs ev","ac h","etw een","er n","Ġ 3","Ġp r","Ġg ro","r uc","Ġd i","Ġ19 9","ĠA r","Ġg ame","Ġh im","oo k","Ġ up","Ġab out","Ġre l","for m","Ġth ree","at t","ĠC om","Ġs a","ear s","Ġ 5","r y","Ġi mp","Ġm ost","f er","Ġp res","Ġf il","Ġb etween","Ġbe g","p h","or s","Ġth an","Ġrec or","o b","er ic","at ing","Ġth roug","k ing","Ġo ut","Ġn um","oo d","oll ow","ac t","u il","Ġc re","ol og","at ional","Ġpro duc","Ġwh ile","Ġl ater","Ġw rit","e x","Ġst ar","Ġsp ec","e e","ish ed","Ġre g","is ion","ou th","Ġre le","Ġa ss","Ġse ason","Ġm ade","il y","r u","o y","t ur","t e","Ġ qu","Ġm ov","ur y","ĠAm eric","em ent","c c","ou nd","Ġl ar","Ġfor m","ec t","Ġde f","Ġm us","ĠP ar","Ġm e","Ġs ub","w ay","o p","o h","el d","i e","em p","am es","er n","Ġn or","iv ed","ev el","Ġsuc h","ar ds","Ġin d","ik e","Ġg en","er t","Ġy ear","Ġus ed","Ġn ew","Ġ 5","Ġal b","s p","y p","Ġwit h","Ġwh ere","ic s","ĠTh is","Ġthe m","w n"]}} \ No newline at end of file +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "special": true, + "content": "<|startoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true + }, + { + "id": 1, + "special": true, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "fuse_unk": false, + "vocab": { + "<|startoftext|>": 0, + "<|endoftext|>": 1, + "!": 2, + "\"": 3, + "#": 4, + "$": 5, + "%": 6, + "&": 7, + "'": 8, + "(": 9, + ")": 10, + "*": 11, + "+": 12, + ",": 13, + "-": 14, + ".": 15, + "/": 16, + "0": 17, + "1": 18, + "2": 19, + "3": 20, + "4": 21, + "5": 22, + "6": 23, + "7": 24, + "8": 25, + "9": 26, + ":": 27, + ";": 28, + "<": 29, + "=": 30, + ">": 31, + "?": 32, + "@": 33, + "A": 34, + "B": 35, + "C": 36, + "D": 37, + "E": 38, + "F": 39, + "G": 40, + "H": 41, + "I": 42, + "J": 43, + "K": 44, + "L": 45, + "M": 46, + "N": 47, + "O": 48, + "P": 49, + "Q": 50, + "R": 51, + "S": 52, + "T": 53, + "U": 54, + "V": 55, + "W": 56, + "X": 57, + "Y": 58, + "Z": 59, + "[": 60, + "\\": 61, + "]": 62, + "^": 63, + "_": 64, + "`": 65, + "a": 66, + "b": 67, + "c": 68, + "d": 69, + "e": 70, + "f": 71, + "g": 72, + "h": 73, + "i": 74, + "j": 75, + "k": 76, + "l": 77, + "m": 78, + "n": 79, + "o": 80, + "p": 81, + "q": 82, + "r": 83, + "s": 84, + "t": 85, + "u": 86, + "v": 87, + "w": 88, + "x": 89, + "y": 90, + "z": 91, + "|": 92, + "}": 93, + "~": 94, + "¡": 95, + "¢": 96, + "£": 97, + "¤": 98, + "¥": 99 + }, + "merges": [ + "Ġ t", + "Ġt h", + "Ġ a", + "Ġth e", + "i n", + "Ġ o", + "Ġ ,", + "Ġ s", + "e d", + "Ġ w", + "e r", + "Ġ .", + "Ġ i", + "r e", + "Ġ c", + "n d", + "Ġ f", + "Ġ b", + "a t", + "Ġo f", + "e r", + "e n", + "a r", + "o r", + "i t", + "Ġ p", + "Ġ h", + "Ġa nd", + "o n", + "in g", + "a n", + "r o", + "Ġ m", + "Ġ d", + "e s", + "Ġi n", + "o n", + "Ġt o", + "o u", + "i s", + "Ġ a", + "i c", + "Ġ T", + "a l", + "Ġ l", + "Ġ =", + "Ġ re", + "Ġ \"", + "e s", + "Ġ S", + "a s", + "a l", + "i l", + "e l", + "i on", + "Ġ A", + "Ġ C", + "Ġ 1", + "Ġ Ċ", + "u r", + "ĠT h", + "Ġ n", + "a s", + "Ġ @", + "e c", + "o m", + "a c", + "Ġ e", + "Ġw as", + "Ġ M", + "o r", + "a n", + "a m", + "e n", + "o l", + "Ġ in", + "Ġ g", + "Ġ '", + "Ġ B", + "l y", + "a t", + "i v", + "t s", + "ĠTh e", + "u s", + "- @", + "Ġ@ -@", + "i s", + "Ġ I", + "Ġw h", + "i g", + "Ġ H", + "Ġs t", + "o s", + "u n", + "t h", + "Ġ P", + "Ġw it", + "Ġth at", + "i r", + "Ġa s", + "e m", + "Ġo n", + "r a", + "Ġf or", + "Ġ R", + "e t", + "o w", + "Ġ 2", + "i d", + "Ġ D", + "l e", + "Ġwit h", + "l a", + "en t", + "i m", + "Ġ F", + "e a", + "i on", + "Ġb y", + "Ġ )", + "Ġ (", + "Ġa l", + "Ġc on", + "en t", + "Ġ W", + "Ġi s", + "er e", + "Ġ G", + "Ġ N", + "Ġ L", + "Ġh a", + "er s", + "r i", + "t h", + "t ed", + "u c", + "Ġ J", + "Ġ1 9", + "e v", + "u l", + "Ġ v", + "c e", + "at ion", + "ro m", + "Ġb e", + "Ġ E", + "i n", + "Ġth e", + "Ġf rom", + "Ġ O", + "t er", + "Ġp ro", + "Ġa r", + "a d", + "Ġc om", + "i c", + "a g", + "Ġh is", + "Ġs h", + "Ġa t", + "o v", + "i es", + "o o", + "p p", + "s t", + "c h", + "Ġ r", + "Ġ2 0", + "a y", + "i f", + "Ġw ere", + "Ġc h", + "u t", + "s t", + "u t", + "d s", + "o p", + "u m", + "Ġi t", + "o c", + "t er", + "l e", + "ig h", + "u d", + "Ġe x", + "ion s", + "at e", + "it y", + "at ed", + "Ġ un", + "e p", + "q u", + "Ġn o", + "Ġ K", + "iv e", + "is t", + "Ġo n", + "am e", + "ou n", + "i r", + "a b", + "Ġ â", + "in g", + "Ġh e", + "l d", + "u g", + "ic h", + "Ġa n", + "e d", + "Ġ k", + "Ġâ Ģ", + "Ġha d", + "v e", + "a in", + "Ġs e", + "t ion", + "or e", + "re s", + "Ġwh ich", + "ĠI n", + "o d", + "th er", + "a k", + "Ġs p", + "a r", + "Ġ y", + "ĠC h", + "on g", + "Ġa c", + "es t", + "Ġ U", + "a p", + "f f", + "al ly", + "r it", + "ĠS t", + "u b", + "g e", + "b er", + "e t", + "Ġb e", + "e ar", + "Ġre c", + "er s", + "Ġf ir", + "o t", + "Ġar e", + "Ġa n", + "c h", + "o g", + "i a", + "es t", + "in e", + "il l", + "an d", + "e l", + "ar y", + "e w", + "i d", + "Ġf or", + "Ġ ;", + "Ġcom p", + "Ġ V", + "Ġin c", + "t r", + "Ġ20 0", + "Ġthe ir", + "u s", + "Ġb ut", + "r an", + "ic al", + "Ġfir st", + "Ġd e", + "Ġin t", + "Ġ ro", + "s o", + "ĠâĢ ĵ", + "Ġno t", + "d ing", + "f ter", + "ur e", + "Ġp ar", + "Ġ :", + "i an", + "Ġt w", + "ou ld", + "Ġal so", + "Ġi ts", + "Ġw or", + "u m", + "Ġo r", + "os t", + "0 0", + "ou r", + "ar d", + "Ġre s", + "m p", + "u e", + "Ġa b", + "is h", + "Ġcon t", + "Ġa d", + "ow n", + "al l", + "ou g", + "Ġh er", + "as t", + "Ġ en", + "om e", + "al l", + "d ed", + "o w", + "Ġha ve", + "Ġ us", + "ea r", + "ac k", + "d uc", + "i al", + "s s", + "en ts", + "a in", + "t ing", + "Ġon e", + "es s", + "Ġh as", + "igh t", + "a v", + "Ġe v", + "ou t", + "a y", + "en ce", + "Ġbe en", + "e w", + "Ġtw o", + "Ġc l", + "d er", + "im e", + "k s", + "es s", + "is h", + ". @", + "Ġ@ .@", + "Ġp la", + "Ġp l", + "Ġo r", + "u p", + "m ent", + "ur ing", + "ol l", + "ĠI n", + "Ġth is", + "Ġb ec", + "Ġcom m", + "Ġd is", + "at er", + "ag e", + "Ġa pp", + "ou s", + "e y", + "i l", + "p er", + "ĠA l", + "ion al", + "l ud", + "el y", + "t t", + "il e", + "i z", + "Ġ j", + "Ġwh o", + "Ġa g", + "i b", + "Ġthe y", + "f or", + "Ġo v", + "at h", + "e g", + "Ġs c", + "i p", + "Ġ20 1", + "Ġ 3", + "Ġp er", + "or y", + "Ġd es", + "id e", + "Ġs er", + "s e", + "ĠH e", + "la nd", + "at ions", + "r ic", + "i t", + "re s", + "er ed", + "Ġp re", + "ĠS h", + "an ce", + "or t", + "an t", + ", @", + "Ġ@ ,@", + "el l", + "Ġ Y", + "n ed", + "el l", + "it e", + "Ġinc lud", + "Ġre p", + "Ġa fter", + "Ġs uc", + "re e", + "an y", + "i m", + "or t", + "Ġ1 8", + "Ġs u", + "ad e", + "ou r", + "ĠU n", + "ĠI t", + "i k", + "ĠM ar", + "em ber", + "Ġ 1", + "e en", + "a nd", + "Ġs ec", + "ic e", + "Ġt ime", + "ĠA n", + "Ġint o", + "Ġf in", + "Ġo ther", + "Ġa tt", + "il l", + "re n", + "ac h", + "as s", + "er al", + "es e", + "s h", + "al s", + "it ion", + "oug h", + "l es", + "am p", + "Ġw ould", + "Ġm ore", + "ro ug", + "ri b", + "er y", + "ac e", + "Ġ A", + "Ġpla y", + "it ed", + "k ed", + "is t", + "i ed", + "Ġ 2", + "as ed", + "ing s", + "an g", + "a m", + "i p", + "Ġb o", + "ab le", + "t y", + "Ġch ar", + "Ġc ent", + "et w", + "at es", + "ro p", + "Ġ I", + "u nd", + "ĠA m", + "c es", + "o in", + "Ġin ter", + "u p", + "c t", + "on e", + "Ġt ra", + "an t", + "ec t", + "Ġal l", + "e f", + "Ġcon s", + "ub l", + "n ing", + "an s", + "Ġf e", + "us t", + "Ġ 0", + "Ġre m", + "as e", + "on g", + "Ġwh en", + "e b", + "ĠW h", + "Ġe ar", + "ev er", + "Ġov er", + "Ġk n", + "a us", + "Ġp os", + "a d", + "er m", + "Ġsh e", + "Ġ ra", + "Ġd uring", + "as on", + "v i", + "Ġex p", + "Ġl ea", + "Ġ el", + "Ġ 4", + "Ġon ly", + "o nd", + "Ġd ec", + "Ġac c", + "Ġo ff", + "is s", + "Ġf l", + "ĠE n", + "o t", + "en s", + "os e", + "ak e", + "o m", + "Ġs ev", + "ac h", + "etw een", + "er n", + "Ġ 3", + "Ġp r", + "Ġg ro", + "r uc", + "Ġd i", + "Ġ19 9", + "ĠA r", + "Ġg ame", + "Ġh im", + "oo k", + "Ġ up", + "Ġab out", + "Ġre l", + "for m", + "Ġth ree", + "at t", + "ĠC om", + "Ġs a", + "ear s", + "Ġ 5", + "r y", + "Ġi mp", + "Ġm ost", + "f er", + "Ġp res", + "Ġf il", + "Ġb etween", + "Ġbe g", + "p h", + "or s", + "Ġth an", + "Ġrec or", + "o b", + "er ic", + "at ing", + "Ġth roug", + "k ing", + "Ġo ut", + "Ġn um", + "oo d", + "oll ow", + "ac t", + "u il", + "Ġc re", + "ol og", + "at ional", + "Ġpro duc", + "Ġwh ile", + "Ġl ater", + "Ġw rit", + "e x", + "Ġst ar", + "Ġsp ec", + "e e", + "ish ed", + "Ġre g", + "is ion", + "ou th", + "Ġre le", + "Ġa ss", + "Ġse ason", + "Ġm ade", + "il y", + "r u", + "o y", + "t ur", + "t e", + "Ġ qu", + "Ġm ov", + "ur y", + "ĠAm eric", + "em ent", + "c c", + "ou nd", + "Ġl ar", + "Ġfor m", + "ec t", + "Ġde f", + "Ġm us", + "ĠP ar", + "Ġm e", + "Ġs ub", + "w ay", + "o p", + "o h", + "el d", + "i e", + "em p", + "am es", + "er n", + "Ġn or", + "iv ed", + "ev el", + "Ġsuc h", + "ar ds", + "Ġin d", + "ik e", + "Ġg en", + "er t", + "Ġy ear", + "Ġus ed", + "Ġn ew", + "Ġ 5", + "Ġal b", + "s p", + "y p", + "Ġwit h", + "Ġwh ere", + "ic s", + "ĠTh is", + "Ġthe m", + "w n" + ] + } +}