Upload pytorch_model.bin

Update slack link
Update decode method in tokenizer
2023-05-15 12:41:28 +00:00 · 2023-05-12 13:49:56 +00:00 · 2023-05-09 11:32:40 +08:00 · 2023-05-04 21:45:31 +02:00 · 2023-04-29 10:34:45 +08:00 · 2023-04-28 20:17:25 +08:00
6 changed files with 67 additions and 37 deletions
--- a/README.md
+++ b/README.md
@ -9,7 +9,7 @@ tags:
 ---
 # ChatGLM-6B-INT4
 <p align="center">
-    👋 Join our <a href="https://join.slack.com/t/chatglm/shared_invite/zt-1t4a8evfn-vduo2hhNcYqBUnZ71IXiqQ" target="_blank">Slack</a> and <a href="https://github.com/THUDM/ChatGLM-6B/blob/main/resources/WECHAT.md" target="_blank">WeChat</a>
+    👋 Join our <a href="https://join.slack.com/t/chatglm/shared_invite/zt-1udqapmrr-ocT1DS_mxWe6dDY8ahRWzg" target="_blank">Slack</a> and <a href="https://github.com/THUDM/ChatGLM-6B/blob/main/resources/WECHAT.md" target="_blank">WeChat</a>
 </p>

 ## 介绍
--- a/config.json
+++ b/config.json
@ -10,16 +10,16 @@
  },
  "bos_token_id": 130004,
  "eos_token_id": 130005,
+  "mask_token_id": 130000,
  "gmask_token_id": 130001,
+  "pad_token_id": 3,
  "hidden_size": 4096,
  "inner_hidden_size": 16384,
  "layernorm_epsilon": 1e-05,
-  "mask_token_id": 130000,
  "max_sequence_length": 2048,
  "model_type": "chatglm",
  "num_attention_heads": 32,
  "num_layers": 28,
-  "pad_token_id": 3,
  "position_encoding_2d": true,
  "quantization_bit": 4,
  "quantization_embeddings": false,
--- a/modeling_chatglm.py
+++ b/modeling_chatglm.py
@ -918,7 +918,7 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
        elif input_ids is not None:
            batch_size, seq_length = input_ids.shape[:2]
        elif inputs_embeds is not None:
-            batch_size, seq_length, _ = inputs_embeds.shape[:2]
+            batch_size, seq_length = inputs_embeds.shape[:2]
        else:
            raise ValueError("You have to specify either input_ids or inputs_embeds")

@ -972,9 +972,8 @@ class ChatGLMModel(ChatGLMPreTrainedModel):

        if attention_mask is None:
            attention_mask = torch.zeros(1, 1, device=input_ids.device).bool()
-
        else:
-            attention_mask = attention_mask.to(input_ids.device)
+            attention_mask = attention_mask.to(hidden_states.device)

        for i, layer in enumerate(self.layers):

--- a/pytorch_model.bin
+++ b/pytorch_model.bin
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35828b49cf23cbae4c27788d4b04fc68c79a276300e09f14d72a49b0b738b4a9
+oid sha256:245786435bde9f4593c105ea846fa461fe42bc63c12b738d0272fcaed6276645
 size 3893083075
--- a/quantization.py
+++ b/quantization.py
--- a/tokenization_chatglm.py
+++ b/tokenization_chatglm.py
@ -31,6 +31,9 @@ class TextTokenizer:
    def tokenize(self, text):
        return self.sp.EncodeAsPieces(text)

+    def convert_tokens_to_string(self, tokens):
+        return self.sp.DecodePieces(tokens)
+
    def convert_tokens_to_ids(self, tokens):
        return [self.sp.PieceToId(token) for token in tokens]

@ -111,16 +114,25 @@ class SPTokenizer:
        tokens = [x + self.num_image_tokens for x in tmp]
        return tokens if add_dummy_prefix else tokens[2:]

-    def decode(self, text_ids: List[int]) -> str:
-        ids = [int(_id) - self.num_image_tokens for _id in text_ids]
-        ids = [_id for _id in ids if _id >= 0]
-        text = self._get_text_tokenizer().decode(ids)
+    def postprocess(self, text):
        text = text.replace("<n>", "\n")
        text = text.replace(SPTokenizer.get_tab_token(), "\t")
        for i in range(2, self.max_blank_length + 1):
            text = text.replace(self.get_blank_token(i), " " * i)
        return text

+    def decode(self, text_ids: List[int]) -> str:
+        ids = [int(_id) - self.num_image_tokens for _id in text_ids]
+        ids = [_id for _id in ids if _id >= 0]
+        text = self._get_text_tokenizer().decode(ids)
+        text = self.postprocess(text)
+        return text
+
+    def decode_tokens(self, tokens: List[str]) -> str:
+        text = self._get_text_tokenizer().convert_tokens_to_string(tokens)
+        text = self.postprocess(text)
+        return text
+
    def tokenize(
            self, text: str, linebreak=True, whitespaces=True, add_dummy_prefix=True
    ) -> List[str]:
@ -256,11 +268,12 @@ class ChatGLMTokenizer(PreTrainedTokenizer):

        return seq

+    def convert_tokens_to_string(self, tokens: List[str]) -> str:
+        return self.sp_tokenizer.decode_tokens(tokens)
+
    def _decode(
            self,
            token_ids: Union[int, List[int]],
-            skip_special_tokens: bool = False,
-            clean_up_tokenization_spaces: bool = True,
            **kwargs
    ) -> str:
        if isinstance(token_ids, int):
@ -269,7 +282,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
            return ""
        if self.pad_token_id in token_ids:  # remove pad
            token_ids = list(filter((self.pad_token_id).__ne__, token_ids))
-        return self.sp_tokenizer.decode(token_ids)
+        return super()._decode(token_ids, **kwargs)

    def _convert_token_to_id(self, token):
        """ Converts a token (str) in an id using the vocab. """
Author	SHA1	Message	Date
Zhengxiao Du	02a065cf27	Upload pytorch_model.bin	2023-05-15 12:41:28 +00:00
Zhengxiao Du	e214c5b71d	Update slack link	2023-05-12 13:49:56 +00:00
duzx16	d8a6cfc6cb	Update decode method in tokenizer	2023-05-09 11:32:40 +08:00
duzx16	f6b88da8c1	Add support for parallel quantization on Mac	2023-05-04 21:45:31 +02:00
duzx16	63d66b0572	Remove assert in load_cpu_kernel	2023-04-29 10:34:45 +08:00
duzx16	f55a1089a2	Sync with chatglm-6b	2023-04-28 20:17:25 +08:00
duzx16	e02ba894cf	Remove pytorch_model.bin.index.json	2023-04-17 21:34:20 +08:00
duzx16	6498797e79	Update slack link	2023-04-17 16:08:53 +08:00
duzx16	1e40d965fe	Add pytorch_model.bin.index.json	2023-04-16 21:04:30 +08:00
songxxzp	630d0efd8b	Add assertion when loading cpu and cuda kernel fails	2023-04-14 20:00:41 +08:00
songxxzp	bcc35f08b4	Add assertion when loading cpu and cuda kernel fails	2023-04-14 19:58:42 +08:00
songxxzp	fe0674f86d	Merge branch 'dev'	2023-04-14 19:01:28 +08:00