distilbert-base-cased/tokenizer.json

1 line
426 KiB
JSON
Raw Permalink Normal View History

2020-10-12 20:56:41 +08:00
{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"[PAD]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":100,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":101,"special":true,"content":"[CLS]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":102,"special":true,"content":"[SEP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":103,"special":true,"content":"[MASK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":{"type":"BertNormalizer","clean_text":true,"handle_chinese_chars":true,"strip_accents":null,"lowercase":false},"pre_tokenizer":{"type":"BertPreTokenizer"},"post_processor":{"type":"TemplateProcessing","single":[{"SpecialToken":{"id":"[CLS]","type_id":0}},{"Sequence":{"id":"A","type_id":0}},{"SpecialToken":{"id":"[SEP]","type_id":0}}],"pair":[{"SpecialToken":{"id":"[CLS]","type_id":0}},{"Sequence":{"id":"A","type_id":0}},{"SpecialToken":{"id":"[SEP]","type_id":0}},{"Sequence":{"id":"B","type_id":1}},{"SpecialToken":{"id":"[SEP]","type_id":1}}],"special_tokens":{"[SEP]":{"id":"[SEP]","ids":[102],"tokens":["[SEP]"]},"[CLS]":{"id":"[CLS]","ids":[101],"tokens":["[CLS]"]}}},"decoder":{"type":"WordPiece","prefix":"##","cleanup":true},"model":{"unk_token":"[UNK]","continuing_subword_prefix":"##","max_input_chars_per_word":100,"vocab":{"[PAD]":0,"[unused1]":1,"[unused2]":2,"[unused3]":3,"[unused4]":4,"[unused5]":5,"[unused6]":6,"[unused7]":7,"[unused8]":8,"[unused9]":9,"[unused10]":10,"[unused11]":11,"[unused12]":12,"[unused13]":13,"[unused14]":14,"[unused15]":15,"[unused16]":16,"[unused17]":17,"[unused18]":18,"[unused19]":19,"[unused20]":20,"[unused21]":21,"[unused22]":22,"[unused23]":23,"[unused24]":24,"[unused25]":25,"[unused26]":26,"[unused27]":27,"[unused28]":28,"[unused29]":29,"[unused30]":30,"[unused31]":31,"[unused32]":32,"[unused33]":33,"[unused34]":34,"[unused35]":35,"[unused36]":36,"[unused37]":37,"[unused38]":38,"[unused39]":39,"[unused40]":40,"[unused41]":41,"[unused42]":42,"[unused43]":43,"[unused44]":44,"[unused45]":45,"[unused46]":46,"[unused47]":47,"[unused48]":48,"[unused49]":49,"[unused50]":50,"[unused51]":51,"[unused52]":52,"[unused53]":53,"[unused54]":54,"[unused55]":55,"[unused56]":56,"[unused57]":57,"[unused58]":58,"[unused59]":59,"[unused60]":60,"[unused61]":61,"[unused62]":62,"[unused63]":63,"[unused64]":64,"[unused65]":65,"[unused66]":66,"[unused67]":67,"[unused68]":68,"[unused69]":69,"[unused70]":70,"[unused71]":71,"[unused72]":72,"[unused73]":73,"[unused74]":74,"[unused75]":75,"[unused76]":76,"[unused77]":77,"[unused78]":78,"[unused79]":79,"[unused80]":80,"[unused81]":81,"[unused82]":82,"[unused83]":83,"[unused84]":84,"[unused85]":85,"[unused86]":86,"[unused87]":87,"[unused88]":88,"[unused89]":89,"[unused90]":90,"[unused91]":91,"[unused92]":92,"[unused93]":93,"[unused94]":94,"[unused95]":95,"[unused96]":96,"[unused97]":97,"[unused98]":98,"[unused99]":99,"[UNK]":100,"[CLS]":101,"[SEP]":102,"[MASK]":103,"[unused100]":104,"[unused101]":105,"!":106,"\"":107,"#":108,"$":109,"%":110,"&":111,"'":112,"(":113,")":114,"*":115,"+":116,",":117,"-":118,".":119,"/":120,"0":121,"1":122,"2":123,"3":124,"4":125,"5":126,"6":127,"7":128,"8":129,"9":130,":":131,";":132,"<":133,"=":134,">":135,"?":136,"@":137,"A":138,"B":139,"C":140,"D":141,"E":142,"F":143,"G":144,"H":145,"I":146,"J":147,"K":148,"L":149,"M":150,"N":151,"O":152,"P":153,"Q":154,"R":155,"S":156,"T":157,"U":158,"V":159,"W":160,"X":161,"Y":162,"Z":163,"[":164,"\\":165,"]":166,"^":167,"_":168,"`":169,"a":170,"b":171,"c":172,"d":173,"e":174,"f":175,"g":176,"h":177,"i":178,"j":179,"k":180,"l":181,"m":182,"n":183,"o":184,"p":185,"q":186,"r":187,"s":188,"t":189,"u":190,"v":191,"w":192,"x":193,"y":194,"z":195,"{":196,"|":197,"}":198,"~":199,"¡":200,"¢":201,"£":202,"¥":203,"§":204,"¨":205,"©":206,"ª":207,"«":208,"¬":209,"®":210,"°":211,"±":212,"²":213,"³":214,"´":215,"µ":216,"¶":217,"·":218,"¹":219,"º":220,"»":221