add tokenizer

This commit is contained in:
Moritz Laurer 2021-12-05 16:33:24 +00:00
parent 899fa3ee80
commit aa728f3a52
4 changed files with 6 additions and 0 deletions

1
added_tokens.json Normal file
View File

@ -0,0 +1 @@
{"[MASK]": 250101}

1
special_tokens_map.json Normal file
View File

@ -0,0 +1 @@
{"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}

BIN
spm.model (Stored with Git LFS) Normal file

Binary file not shown.

1
tokenizer_config.json Normal file
View File

@ -0,0 +1 @@
{"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": false, "sp_model_kwargs": {}, "vocab_type": "spm", "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./results/nli-few-shot/xnli/mDeBERTa-v3-base/mDeBERTa-v3-base-mnli-xnli", "tokenizer_class": "DebertaV2Tokenizer"}