Upload processor

This commit is contained in:
Tao Jin 2022-11-04 22:11:33 +00:00 committed by huggingface-web
parent c0d929c00a
commit 9dca1f007f
6 changed files with 230436 additions and 0 deletions

12
added_tokens.json Normal file
View File

@ -0,0 +1,12 @@
{
"</s_answer>": 57530,
"</s_question>": 57528,
"<no/>": 57526,
"<s_answer>": 57529,
"<s_docvqa>": 57531,
"<s_iitcdip>": 57523,
"<s_question>": 57527,
"<s_synthdog>": 57524,
"<sep/>": 57522,
"<yes/>": 57525
}

24
preprocessor_config.json Normal file
View File

@ -0,0 +1,24 @@
{
"do_align_long_axis": false,
"do_normalize": true,
"do_pad": true,
"do_resize": true,
"do_thumbnail": true,
"feature_extractor_type": "DonutFeatureExtractor",
"image_mean": [
0.5,
0.5,
0.5
],
"image_std": [
0.5,
0.5,
0.5
],
"processor_class": "DonutProcessor",
"resample": 2,
"size": [
960,
1280
]
}

BIN
sentencepiece.bpe.model (Stored with Git LFS) Normal file

Binary file not shown.

19
special_tokens_map.json Normal file
View File

@ -0,0 +1,19 @@
{
"additional_special_tokens": [
"<s_iitcdip>",
"<s_synthdog>"
],
"bos_token": "<s>",
"cls_token": "<s>",
"eos_token": "</s>",
"mask_token": {
"content": "<mask>",
"lstrip": true,
"normalized": true,
"rstrip": false,
"single_word": false
},
"pad_token": "<pad>",
"sep_token": "</s>",
"unk_token": "<unk>"
}

230357
tokenizer.json Normal file

File diff suppressed because one or more lines are too long

21
tokenizer_config.json Normal file
View File

@ -0,0 +1,21 @@
{
"bos_token": "<s>",
"cls_token": "<s>",
"eos_token": "</s>",
"mask_token": {
"__type": "AddedToken",
"content": "<mask>",
"lstrip": true,
"normalized": true,
"rstrip": false,
"single_word": false
},
"name_or_path": "naver-clova-ix/donut-base",
"pad_token": "<pad>",
"processor_class": "DonutProcessor",
"sep_token": "</s>",
"sp_model_kwargs": {},
"special_tokens_map_file": null,
"tokenizer_class": "XLMRobertaTokenizer",
"unk_token": "<unk>"
}