Fix attention_mask and position_ids
This commit is contained in:
parent
e22cddf212
commit
373fd6b9d4
|
@ -340,7 +340,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
||||||
token_ids_0 += [self.sp_tokenizer[self.bos_token]]
|
token_ids_0 += [self.sp_tokenizer[self.bos_token]]
|
||||||
|
|
||||||
if token_ids_1 is not None:
|
if token_ids_1 is not None:
|
||||||
if token_ids_1[-1] != eop_id:
|
if not token_ids_1 or token_ids_1[-1] != eop_id:
|
||||||
token_ids_1 += [eop_id]
|
token_ids_1 += [eop_id]
|
||||||
token_ids_0 += token_ids_1
|
token_ids_0 += token_ids_1
|
||||||
|
|
||||||
|
@ -397,7 +397,8 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
||||||
needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length
|
needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length
|
||||||
|
|
||||||
# Initialize attention mask if not present.
|
# Initialize attention mask if not present.
|
||||||
if return_attention_mask:
|
if max_length is not None:
|
||||||
|
if "attention_mask" not in encoded_inputs:
|
||||||
if bos_token_id in required_input:
|
if bos_token_id in required_input:
|
||||||
context_length = required_input.index(bos_token_id)
|
context_length = required_input.index(bos_token_id)
|
||||||
else:
|
else:
|
||||||
|
@ -408,6 +409,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
||||||
attention_mask = np.bool_(attention_mask < 0.5)
|
attention_mask = np.bool_(attention_mask < 0.5)
|
||||||
encoded_inputs["attention_mask"] = attention_mask
|
encoded_inputs["attention_mask"] = attention_mask
|
||||||
|
|
||||||
|
if "position_ids" not in encoded_inputs:
|
||||||
position_ids = np.arange(seq_length, dtype=np.int64)
|
position_ids = np.arange(seq_length, dtype=np.int64)
|
||||||
mask_token = mask_token_id if mask_token_id in required_input else gmask_token_id
|
mask_token = mask_token_id if mask_token_id in required_input else gmask_token_id
|
||||||
if mask_token in required_input:
|
if mask_token in required_input:
|
||||||
|
|
Loading…
Reference in New Issue