Always add gmask in token ids

This commit is contained in:
duzx16 2023-04-14 15:54:32 +08:00
parent 53f019758b
commit 3a99d7951d
1 changed files with 3 additions and 14 deletions

View File

@ -326,22 +326,11 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
Returns: Returns:
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens. `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
""" """
mask_ids = self.sp_tokenizer[self.mask_token] gmask_id = self.sp_tokenizer[self.gmask_token]
gmask_ids = self.sp_tokenizer[self.gmask_token]
eos_id = self.sp_tokenizer[self.eos_token] eos_id = self.sp_tokenizer[self.eos_token]
if mask_ids not in token_ids_0 and gmask_ids not in token_ids_0: token_ids_0 = token_ids_0 + [gmask_id, self.sp_tokenizer[self.bos_token]]
token_ids_0 += [gmask_ids]
if token_ids_0[-1] != mask_ids and token_ids_0[-1] != gmask_ids:
token_ids_0 += [self.sp_tokenizer[self.end_token]]
token_ids_0 += [self.sp_tokenizer[self.bos_token]]
if token_ids_1 is not None: if token_ids_1 is not None:
if not token_ids_1 or token_ids_1[-1] != eos_id: token_ids_0 = token_ids_0 + token_ids_1 + [eos_id]
token_ids_1 += [eos_id]
token_ids_0 += token_ids_1
return token_ids_0 return token_ids_0
def _pad( def _pad(