Update tokenizer

2023-03-14 01:32:34 +08:00 · 2023-03-14 01:32:34 +08:00 · c4575e73d0
parent fc937144cc
commit c4575e73d0
1 changed files with 1 additions and 3 deletions
--- a/tokenization_chatglm.py
+++ b/tokenization_chatglm.py
@ -15,8 +15,6 @@ from transformers.utils import logging

 logger = logging.get_logger(__name__)

-VOCAB_FILES_NAMES = {"vocab_file": "ice_text.model"}
-
 PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
    "THUDM/chatglm-6b": 2048,
 }
@ -179,7 +177,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
            Path to the vocabulary file.
    """

-    vocab_files_names = VOCAB_FILES_NAMES
+    vocab_files_names = {"vocab_file": "ice_text.model"}
    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
    model_input_names = ["input_ids"]