diff --git a/config.json b/config.json new file mode 100644 index 0000000..07fb2b6 --- /dev/null +++ b/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "THUDM/chatglm-6b", + "architectures": [ + "ChatGLMModel" + ], + "auto_map": { + "AutoConfig": "THUDM/chatglm-6b--configuration_chatglm.ChatGLMConfig", + "AutoModel": "THUDM/chatglm-6b--modeling_chatglm.ChatGLMForConditionalGeneration", + "AutoModelForSeq2SeqLM": "THUDM/chatglm-6b--modeling_chatglm.ChatGLMForConditionalGeneration" + }, + "bos_token_id": 130004, + "eos_token_id": 130005, + "gmask_token_id": 130001, + "hidden_size": 4096, + "inner_hidden_size": 16384, + "layernorm_epsilon": 1e-05, + "mask_token_id": 130000, + "max_sequence_length": 2048, + "model_type": "chatglm", + "num_attention_heads": 32, + "num_layers": 28, + "pad_token_id": 3, + "position_encoding_2d": true, + "pre_seq_len": null, + "prefix_projection": false, + "quantization_bit": 0, + "torch_dtype": "float16", + "transformers_version": "4.29.2", + "use_cache": true, + "vocab_size": 130528 +} diff --git a/ice_text.model b/ice_text.model new file mode 100644 index 0000000..5dbadd3 Binary files /dev/null and b/ice_text.model differ diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..1f897c9 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "bos_token": "", + "eos_token": "", + "mask_token": "[MASK]", + "pad_token": "", + "unk_token": "" +} diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..6fc7e68 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,22 @@ +{ + "auto_map": { + "AutoTokenizer": [ + "THUDM/chatglm-6b--tokenization_chatglm.ChatGLMTokenizer", + null + ] + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "do_lower_case": false, + "end_token": "", + "eos_token": "", + "gmask_token": "[gMASK]", + "mask_token": "[MASK]", + "model_max_length": 2048, + "num_image_tokens": 0, + "pad_token": "", + "padding_side": "left", + "remove_space": false, + "tokenizer_class": "ChatGLMTokenizer", + "unk_token": "" +}