From a7272d4c9364d026ed77840863900ccfe8703a1b Mon Sep 17 00:00:00 2001 From: duzx16 Date: Sat, 8 Apr 2023 12:07:06 +0800 Subject: [PATCH] Fix logit processor Fix tokenizer config saving --- modeling_chatglm.py | 2 +- tokenization_chatglm.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/modeling_chatglm.py b/modeling_chatglm.py index f431ff5..3edaeb1 100644 --- a/modeling_chatglm.py +++ b/modeling_chatglm.py @@ -56,7 +56,7 @@ class InvalidScoreLogitsProcessor(LogitsProcessor): def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: if torch.isnan(scores).any() or torch.isinf(scores).any(): scores.zero_() - scores[..., 20005] = 5e4 + scores[..., 5] = 5e4 return scores diff --git a/tokenization_chatglm.py b/tokenization_chatglm.py index 39aaa19..126b1c2 100644 --- a/tokenization_chatglm.py +++ b/tokenization_chatglm.py @@ -170,9 +170,9 @@ class ChatGLMTokenizer(PreTrainedTokenizer): vocab_file, do_lower_case=False, remove_space=False, - bos_token='sop', - eos_token='eos', - eop_token='eop', + bos_token='', + eos_token='', + eop_token='', mask_token='[MASK]', gmask_token='[gMASK]', padding_side="left", @@ -183,6 +183,12 @@ class ChatGLMTokenizer(PreTrainedTokenizer): do_lower_case=do_lower_case, remove_space=remove_space, padding_side=padding_side, + bos_token=bos_token, + eos_token=eos_token, + eop_token=eop_token, + mask_token=mask_token, + gmask_token=gmask_token, + num_image_tokens=num_image_tokens, **kwargs )