add tokenizer

This commit is contained in:
aicloud 2023-06-26 19:24:28 +08:00
parent 00918d99e6
commit 343f2cb7b3
3 changed files with 13 additions and 0 deletions

6
special_tokens_map.json Normal file
View File

@ -0,0 +1,6 @@
{
"bos_token": "<s>",
"eos_token": "</s>",
"unk_token": "<unk>",
"pad_token": "[PAD]"
}

BIN
tokenizer.model Normal file

Binary file not shown.

7
tokenizer_config.json Normal file
View File

@ -0,0 +1,7 @@
{
"bos_token": "<s>",
"eos_token": "</s>",
"model_max_length": 1000000000000000019884624838656,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>"
}