Upload tokenizer (#26)

- Upload tokenizer (33482313ea52a0bc9ee1303ac23d3f2d36a90932)
This commit is contained in:
Arthur Zucker 2023-02-23 17:55:25 +00:00 committed by system
parent 5ff64998d3
commit 5c6a0f32a1
6 changed files with 101346 additions and 50264 deletions

View File

@ -17,7 +17,6 @@
"<|da|>": 50285, "<|da|>": 50285,
"<|de|>": 50261, "<|de|>": 50261,
"<|el|>": 50281, "<|el|>": 50281,
"<|endoftext|>": 50257,
"<|en|>": 50259, "<|en|>": 50259,
"<|es|>": 50262, "<|es|>": 50262,
"<|et|>": 50307, "<|et|>": 50307,

View File

@ -1,4 +1,4 @@
#version: 0.2 #version: 0.2 - Trained by `huggingface/tokenizers`
Ġ a Ġ a
Ġt h Ġt h
i n i n

View File

@ -124,7 +124,7 @@
}, },
"pad_token": "<|endoftext|>", "pad_token": "<|endoftext|>",
"unk_token": { "unk_token": {
"content": "", "content": "<|endoftext|>",
"lstrip": false, "lstrip": false,
"normalized": true, "normalized": true,
"rstrip": false, "rstrip": false,

101342
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -19,7 +19,6 @@
}, },
"errors": "replace", "errors": "replace",
"model_max_length": 1024, "model_max_length": 1024,
"name_or_path": "openai/whisper-large",
"pad_token": null, "pad_token": null,
"processor_class": "WhisperProcessor", "processor_class": "WhisperProcessor",
"return_attention_mask": false, "return_attention_mask": false,
@ -27,7 +26,7 @@
"tokenizer_class": "WhisperTokenizer", "tokenizer_class": "WhisperTokenizer",
"unk_token": { "unk_token": {
"__type": "AddedToken", "__type": "AddedToken",
"content": "", "content": "<|endoftext|>",
"lstrip": false, "lstrip": false,
"normalized": true, "normalized": true,
"rstrip": false, "rstrip": false,

50260
vocab.json

File diff suppressed because one or more lines are too long