Remove image tokens when decoding
This commit is contained in:
parent
0d8b08d1bb
commit
8492687842
|
@ -130,6 +130,7 @@ class SPTokenizer:
|
||||||
|
|
||||||
def decode(self, text_ids: List[int], special_tokens=False) -> str:
|
def decode(self, text_ids: List[int], special_tokens=False) -> str:
|
||||||
ids = [int(_id) - self.num_image_tokens for _id in text_ids]
|
ids = [int(_id) - self.num_image_tokens for _id in text_ids]
|
||||||
|
ids = [_id for _id in ids if _id >= 0]
|
||||||
text = self._get_text_tokenizer(encode_special_tokens=special_tokens).decode(ids)
|
text = self._get_text_tokenizer(encode_special_tokens=special_tokens).decode(ids)
|
||||||
text = text.replace("<n>", "\n")
|
text = text.replace("<n>", "\n")
|
||||||
text = text.replace(SPTokenizer.get_tab_token(), "\t")
|
text = text.replace(SPTokenizer.get_tab_token(), "\t")
|
||||||
|
|
Loading…
Reference in New Issue