Upload tokenizer (#26)
- Upload tokenizer (33482313ea52a0bc9ee1303ac23d3f2d36a90932)
This commit is contained in:
parent
5ff64998d3
commit
5c6a0f32a1
|
@ -17,7 +17,6 @@
|
||||||
"<|da|>": 50285,
|
"<|da|>": 50285,
|
||||||
"<|de|>": 50261,
|
"<|de|>": 50261,
|
||||||
"<|el|>": 50281,
|
"<|el|>": 50281,
|
||||||
"<|endoftext|>": 50257,
|
|
||||||
"<|en|>": 50259,
|
"<|en|>": 50259,
|
||||||
"<|es|>": 50262,
|
"<|es|>": 50262,
|
||||||
"<|et|>": 50307,
|
"<|et|>": 50307,
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#version: 0.2
|
#version: 0.2 - Trained by `huggingface/tokenizers`
|
||||||
Ġ a
|
Ġ a
|
||||||
Ġt h
|
Ġt h
|
||||||
i n
|
i n
|
||||||
|
|
|
@ -124,7 +124,7 @@
|
||||||
},
|
},
|
||||||
"pad_token": "<|endoftext|>",
|
"pad_token": "<|endoftext|>",
|
||||||
"unk_token": {
|
"unk_token": {
|
||||||
"content": "",
|
"content": "<|endoftext|>",
|
||||||
"lstrip": false,
|
"lstrip": false,
|
||||||
"normalized": true,
|
"normalized": true,
|
||||||
"rstrip": false,
|
"rstrip": false,
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -19,7 +19,6 @@
|
||||||
},
|
},
|
||||||
"errors": "replace",
|
"errors": "replace",
|
||||||
"model_max_length": 1024,
|
"model_max_length": 1024,
|
||||||
"name_or_path": "openai/whisper-large",
|
|
||||||
"pad_token": null,
|
"pad_token": null,
|
||||||
"processor_class": "WhisperProcessor",
|
"processor_class": "WhisperProcessor",
|
||||||
"return_attention_mask": false,
|
"return_attention_mask": false,
|
||||||
|
@ -27,7 +26,7 @@
|
||||||
"tokenizer_class": "WhisperTokenizer",
|
"tokenizer_class": "WhisperTokenizer",
|
||||||
"unk_token": {
|
"unk_token": {
|
||||||
"__type": "AddedToken",
|
"__type": "AddedToken",
|
||||||
"content": "",
|
"content": "<|endoftext|>",
|
||||||
"lstrip": false,
|
"lstrip": false,
|
||||||
"normalized": true,
|
"normalized": true,
|
||||||
"rstrip": false,
|
"rstrip": false,
|
||||||
|
|
50260
vocab.json
50260
vocab.json
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue