diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..33c3c75 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1 @@ +{"": 57522, "": 57523, "": 57524} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..da0fa21 --- /dev/null +++ b/config.json @@ -0,0 +1,23 @@ +{ + "align_long_axis": true, + "architectures": [ + "DonutModel" + ], + "decoder_layer": 4, + "encoder_layer": [ + 2, + 2, + 14, + 2 + ], + "input_size": [ + 2560, + 1920 + ], + "max_length": 1536, + "max_position_embeddings": 1536, + "model_type": "donut", + "torch_dtype": "float32", + "transformers_version": "4.11.3", + "window_size": 10 +} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..606ad86 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d21e8b5e708168f4f9885d18f8bc95ad6950439e7ac518161828ff0b27b984e8 +size 1018458179 diff --git a/sentencepiece.bpe.model b/sentencepiece.bpe.model new file mode 100644 index 0000000..95de5ce --- /dev/null +++ b/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb9e3dce4c326195d08fc3dd0f7e2eee1da8595c847bf4c1a9c78b7a82d47e2d +size 1296245 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..08dc2b2 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{"bos_token": "", "eos_token": "", "unk_token": "", "sep_token": "", "pad_token": "", "cls_token": "", "mask_token": {"content": "", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, "additional_special_tokens": ["", ""]} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..e6070f9 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"bos_token": "", "eos_token": "", "unk_token": "", "sep_token": "", "cls_token": "", "pad_token": "", "mask_token": {"content": "", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "sp_model_kwargs": {}, "special_tokens_map_file": null, "tokenizer_file": "/root/.cache/huggingface/transformers/213c2041358e63047b407f94cde1ae23904d31a3bceb57eab291028c1e949437.7135a4b25ac726e19641f0d68803ff02bad960d6319064f55fa9c536929b86fc", "name_or_path": "hyunwoongko/asian-bart-ecjk", "tokenizer_class": "XLMRobertaTokenizer"} \ No newline at end of file