From bf3fc5e1c5b16851d1cf1501f0fb51dee4a9121e Mon Sep 17 00:00:00 2001 From: jeanpoll Date: Fri, 12 Mar 2021 09:25:57 -0500 Subject: [PATCH] first release --- .gitattributes | 1 + config.json | 38 ++++++++++++++++++++++++++++++++++++++ pytorch_model.bin | 3 +++ sentencepiece.bpe.model | 3 +++ special_tokens_map.json | 1 + tokenizer_config.json | 1 + 6 files changed, 47 insertions(+) create mode 100644 config.json create mode 100644 pytorch_model.bin create mode 100644 sentencepiece.bpe.model create mode 100644 special_tokens_map.json create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes index 07f0db3..afbad77 100644 --- a/.gitattributes +++ b/.gitattributes @@ -14,3 +14,4 @@ *.pb filter=lfs diff=lfs merge=lfs -text *.pt filter=lfs diff=lfs merge=lfs -text *.pth filter=lfs diff=lfs merge=lfs -text +pytorch_model.bin filter=lfs diff=lfs merge=lfs -text diff --git a/config.json b/config.json new file mode 100644 index 0000000..09571de --- /dev/null +++ b/config.json @@ -0,0 +1,38 @@ +{ + "_name_or_path": "camembert-base", + "architectures": [ + "CamembertForTokenClassification" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 5, + "eos_token_id": 6, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "id2label": { + "0": "O", + "1": "LOC", + "2": "PER", + "3": "MISC", + "4": "ORG" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "LOC": 1, + "MISC": 3, + "O": 0, + "ORG": 4, + "PER": 2 + }, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "camembert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "type_vocab_size": 1, + "vocab_size": 32005 +} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..9305433 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72ac83b9b1ded370c85082c67735f7ef250a333aa37e6b3434b0dc0b210b1988 +size 440227047 diff --git a/sentencepiece.bpe.model b/sentencepiece.bpe.model new file mode 100644 index 0000000..489f0a5 --- /dev/null +++ b/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:988bc5a00281c6d210a5d34bd143d0363741a432fefe741bf71e61b1869d4314 +size 810912 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..b74bdc9 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{"bos_token": "", "eos_token": "", "unk_token": "", "sep_token": "", "pad_token": "", "cls_token": "", "mask_token": "", "additional_special_tokens": ["NOTUSED", "NOTUSED"]} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..092c1a2 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"bos_token": "", "eos_token": "", "sep_token": "", "cls_token": "", "unk_token": "", "pad_token": "", "mask_token": "", "additional_special_tokens": ["NOTUSED", "NOTUSED"], "model_max_length": 512, "name_or_path": "camembert-base"} \ No newline at end of file