Remove image tokens when decoding

This commit is contained in:
duzx16 2023-03-16 00:24:42 +08:00
parent 0d8b08d1bb
commit 8492687842
1 changed files with 1 additions and 0 deletions

View File

@ -130,6 +130,7 @@ class SPTokenizer:
def decode(self, text_ids: List[int], special_tokens=False) -> str:
ids = [int(_id) - self.num_image_tokens for _id in text_ids]
ids = [_id for _id in ids if _id >= 0]
text = self._get_text_tokenizer(encode_special_tokens=special_tokens).decode(ids)
text = text.replace("<n>", "\n")
text = text.replace(SPTokenizer.get_tab_token(), "\t")