From 8e72aa603858f8c5952f14488cab39334b56e858 Mon Sep 17 00:00:00 2001 From: Arthur Zucker Date: Tue, 27 Sep 2022 09:55:50 +0000 Subject: [PATCH] Upload tokenizer --- special_tokens_map.json | 9 ++++++++- tokenizer_config.json | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/special_tokens_map.json b/special_tokens_map.json index 6339b4c..479a05b 100644 --- a/special_tokens_map.json +++ b/special_tokens_map.json @@ -108,5 +108,12 @@ "<|notimestamps|>" ], "bos_token": "<|endoftext|>", - "eos_token": "<|endoftext|>" + "eos_token": "<|endoftext|>", + "unk_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } } diff --git a/tokenizer_config.json b/tokenizer_config.json index 18f1887..8c171b1 100644 --- a/tokenizer_config.json +++ b/tokenizer_config.json @@ -19,7 +19,7 @@ }, "errors": "replace", "model_max_length": 1024, - "name_or_path": "/home/arthur_huggingface_co/whisper/_whisper/multi-hf", + "name_or_path": "openai/whisper-tiny", "pad_token": null, "processor_class": "WhisperProcessor", "special_tokens_map_file": null,