diff --git a/config.json b/config.json new file mode 100644 index 0000000..b97e30c --- /dev/null +++ b/config.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "temp/dummy/layoutlm/LayoutLMForQuestionAnswering", + "architectures": [ + "LayoutLMForQuestionAnswering" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 37, + "layer_norm_eps": 1e-12, + "max_2d_position_embeddings": 1024, + "max_position_embeddings": 512, + "model_type": "layoutlm", + "num_attention_heads": 4, + "num_hidden_layers": 5, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.25.0.dev0", + "type_vocab_size": 16, + "use_cache": true, + "vocab_size": 1124 +} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..6b117d7 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0d023aa70c11777330baf1da3c4bb212f283a9028d14723fa222d23cbb78206 +size 910667 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..a8b3208 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,7 @@ +{ + "cls_token": "[CLS]", + "mask_token": "[MASK]", + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "unk_token": "[UNK]" +} diff --git a/tf_model.h5 b/tf_model.h5 new file mode 100644 index 0000000..995f003 --- /dev/null +++ b/tf_model.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5296cbc0e03f8a5cf90c90381d3911054dbd8f48e399e57a1e22e088f26fb1b8 +size 1010128 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..c54b385 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,1274 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "BertNormalizer", + "clean_text": true, + "handle_chinese_chars": true, + "strip_accents": null, + "lowercase": true + }, + "pre_tokenizer": { + "type": "BertPreTokenizer" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [ + 2 + ], + "tokens": [ + "[CLS]" + ] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [ + 3 + ], + "tokens": [ + "[SEP]" + ] + } + } + }, + "decoder": { + "type": "WordPiece", + "prefix": "##", + "cleanup": true + }, + "model": { + "type": "WordPiece", + "unk_token": "[UNK]", + "continuing_subword_prefix": "##", + "max_input_chars_per_word": 100, + "vocab": { + "[PAD]": 0, + "[UNK]": 1, + "[CLS]": 2, + "[SEP]": 3, + "[MASK]": 4, + "!": 5, + "\"": 6, + "#": 7, + "$": 8, + "%": 9, + "&": 10, + "'": 11, + "(": 12, + ")": 13, + "*": 14, + "+": 15, + ",": 16, + "-": 17, + ".": 18, + "/": 19, + "0": 20, + "1": 21, + "2": 22, + "3": 23, + "4": 24, + "5": 25, + "6": 26, + "7": 27, + "8": 28, + "9": 29, + ":": 30, + ";": 31, + "<": 32, + "=": 33, + ">": 34, + "?": 35, + "@": 36, + "[": 37, + "\\": 38, + "]": 39, + "^": 40, + "_": 41, + "`": 42, + "a": 43, + "b": 44, + "c": 45, + "d": 46, + "e": 47, + "f": 48, + "g": 49, + "h": 50, + "i": 51, + "j": 52, + "k": 53, + "l": 54, + "m": 55, + "n": 56, + "o": 57, + "p": 58, + "q": 59, + "r": 60, + "s": 61, + "t": 62, + "u": 63, + "v": 64, + "w": 65, + "x": 66, + "y": 67, + "z": 68, + "|": 69, + "}": 70, + "~": 71, + "¡": 72, + "¢": 73, + "£": 74, + "¥": 75, + "§": 76, + "°": 77, + "±": 78, + "²": 79, + "³": 80, + "´": 81, + "µ": 82, + "·": 83, + "º": 84, + "½": 85, + "¿": 86, + "×": 87, + "ß": 88, + "æ": 89, + "ð": 90, + "ø": 91, + "þ": 92, + "đ": 93, + "ħ": 94, + "ı": 95, + "ł": 96, + "œ": 97, + "ɐ": 98, + "ɑ": 99, + "ɒ": 100, + "ɔ": 101, + "ə": 102, + "ɛ": 103, + "ɜ": 104, + "ɡ": 105, + "ɢ": 106, + "ɪ": 107, + "ɫ": 108, + "ɳ": 109, + "ɽ": 110, + "ɾ": 111, + "ʁ": 112, + "ʃ": 113, + "ʊ": 114, + "ʋ": 115, + "ʒ": 116, + "ʔ": 117, + "ʕ": 118, + "ʲ": 119, + "ʻ": 120, + "ʼ": 121, + "ʾ": 122, + "ʿ": 123, + "ˈ": 124, + "ˌ": 125, + "ː": 126, + "α": 127, + "β": 128, + "γ": 129, + "δ": 130, + "ε": 131, + "η": 132, + "θ": 133, + "ι": 134, + "κ": 135, + "λ": 136, + "μ": 137, + "ν": 138, + "ξ": 139, + "ο": 140, + "π": 141, + "ρ": 142, + "ς": 143, + "σ": 144, + "τ": 145, + "υ": 146, + "φ": 147, + "χ": 148, + "ψ": 149, + "ω": 150, + "а": 151, + "б": 152, + "в": 153, + "г": 154, + "д": 155, + "е": 156, + "ж": 157, + "з": 158, + "и": 159, + "к": 160, + "л": 161, + "м": 162, + "н": 163, + "о": 164, + "п": 165, + "р": 166, + "с": 167, + "т": 168, + "у": 169, + "х": 170, + "ц": 171, + "ш": 172, + "ъ": 173, + "ы": 174, + "ь": 175, + "ю": 176, + "я": 177, + "є": 178, + "א": 179, + "ב": 180, + "ג": 181, + "ה": 182, + "ו": 183, + "ז": 184, + "ח": 185, + "י": 186, + "ל": 187, + "ם": 188, + "מ": 189, + "ן": 190, + "נ": 191, + "ס": 192, + "ף": 193, + "פ": 194, + "צ": 195, + "ר": 196, + "ש": 197, + "ת": 198, + "ء": 199, + "ا": 200, + "ب": 201, + "ة": 202, + "ت": 203, + "ث": 204, + "ج": 205, + "ح": 206, + "خ": 207, + "د": 208, + "ذ": 209, + "ر": 210, + "س": 211, + "ش": 212, + "ص": 213, + "ع": 214, + "ف": 215, + "ق": 216, + "ك": 217, + "ل": 218, + "م": 219, + "ن": 220, + "ه": 221, + "و": 222, + "ي": 223, + "ܐ": 224, + "ܕ": 225, + "ܗ": 226, + "ܝ": 227, + "ܠ": 228, + "ܢ": 229, + "ܬ": 230, + "अ": 231, + "ई": 232, + "क": 233, + "ग": 234, + "ण": 235, + "त": 236, + "द": 237, + "न": 238, + "प": 239, + "ब": 240, + "म": 241, + "य": 242, + "र": 243, + "ल": 244, + "व": 245, + "स": 246, + "ह": 247, + "ा": 248, + "ि": 249, + "আ": 250, + "ল": 251, + "হ": 252, + "া": 253, + "ਅ": 254, + "ਲ": 255, + "ਹ": 256, + "ਾ": 257, + "അ": 258, + "ള": 259, + "ഹ": 260, + "ാ": 261, + "ก": 262, + "ค": 263, + "ง": 264, + "ช": 265, + "ซ": 266, + "ญ": 267, + "ฐ": 268, + "ณ": 269, + "ด": 270, + "ต": 271, + "น": 272, + "บ": 273, + "ป": 274, + "พ": 275, + "ภ": 276, + "ม": 277, + "ย": 278, + "ร": 279, + "ล": 280, + "ว": 281, + "ศ": 282, + "ษ": 283, + "ส": 284, + "ห": 285, + "อ": 286, + "ฮ": 287, + "ะ": 288, + "า": 289, + "เ": 290, + "แ": 291, + "ไ": 292, + "ა": 293, + "ბ": 294, + "გ": 295, + "დ": 296, + "ე": 297, + "ვ": 298, + "ზ": 299, + "თ": 300, + "ი": 301, + "კ": 302, + "ლ": 303, + "მ": 304, + "ნ": 305, + "ო": 306, + "პ": 307, + "ჟ": 308, + "რ": 309, + "ს": 310, + "ტ": 311, + "უ": 312, + "ფ": 313, + "ქ": 314, + "ღ": 315, + "ყ": 316, + "შ": 317, + "ჩ": 318, + "ც": 319, + "ძ": 320, + "წ": 321, + "ჭ": 322, + "ხ": 323, + "ჯ": 324, + "ჰ": 325, + "ჱ": 326, + "ჲ": 327, + "ჳ": 328, + "ჴ": 329, + "ჵ": 330, + "ჶ": 331, + "ჷ": 332, + "ჸ": 333, + "ჹ": 334, + "ჺ": 335, + "჻": 336, + "ᄃ": 337, + "ᄅ": 338, + "ᄇ": 339, + "ᄋ": 340, + "ᄌ": 341, + "ᅡ": 342, + "ᅢ": 343, + "ᅦ": 344, + "ᅧ": 345, + "ᅩ": 346, + "ᅮ": 347, + "ᅵ": 348, + "ᆨ": 349, + "ᆫ": 350, + "ᆯ": 351, + "ᆸ": 352, + "ᆼ": 353, + "ᵻ": 354, + "‐": 355, + "‑": 356, + "–": 357, + "—": 358, + "―": 359, + "‘": 360, + "’": 361, + "“": 362, + "”": 363, + "„": 364, + "†": 365, + "‡": 366, + "•": 367, + "…": 368, + "′": 369, + "″": 370, + "⁄": 371, + "₣": 372, + "₤": 373, + "€": 374, + "₹": 375, + "⅓": 376, + "⅔": 377, + "→": 378, + "−": 379, + "≡": 380, + "≤": 381, + "①": 382, + "☉": 383, + "☫": 384, + "♀": 385, + "♭": 386, + "♯": 387, + "⚳": 388, + "ⴀ": 389, + "ⴂ": 390, + "ⴃ": 391, + "ⴈ": 392, + "ⴌ": 393, + "ⴕ": 394, + "ⴟ": 395, + "〈": 396, + "〉": 397, + "〜": 398, + "あ": 399, + "い": 400, + "う": 401, + "お": 402, + "か": 403, + "き": 404, + "く": 405, + "け": 406, + "こ": 407, + "さ": 408, + "し": 409, + "す": 410, + "せ": 411, + "た": 412, + "ち": 413, + "っ": 414, + "つ": 415, + "と": 416, + "な": 417, + "に": 418, + "の": 419, + "は": 420, + "ひ": 421, + "ふ": 422, + "ほ": 423, + "ま": 424, + "み": 425, + "め": 426, + "も": 427, + "ゃ": 428, + "ゆ": 429, + "ょ": 430, + "ら": 431, + "り": 432, + "る": 433, + "れ": 434, + "わ": 435, + "を": 436, + "ん": 437, + "ァ": 438, + "ア": 439, + "ィ": 440, + "イ": 441, + "ゥ": 442, + "ウ": 443, + "ェ": 444, + "エ": 445, + "ォ": 446, + "オ": 447, + "カ": 448, + "キ": 449, + "ク": 450, + "ケ": 451, + "コ": 452, + "サ": 453, + "シ": 454, + "ス": 455, + "セ": 456, + "タ": 457, + "チ": 458, + "ッ": 459, + "ツ": 460, + "テ": 461, + "ト": 462, + "ナ": 463, + "ニ": 464, + "ネ": 465, + "ノ": 466, + "ハ": 467, + "フ": 468, + "ヘ": 469, + "マ": 470, + "ミ": 471, + "ム": 472, + "モ": 473, + "ャ": 474, + "ュ": 475, + "ョ": 476, + "ラ": 477, + "リ": 478, + "ル": 479, + "レ": 480, + "ロ": 481, + "ン": 482, + "・": 483, + "ー": 484, + "一": 485, + "七": 486, + "下": 487, + "世": 488, + "丙": 489, + "中": 490, + "主": 491, + "乃": 492, + "之": 493, + "乙": 494, + "九": 495, + "二": 496, + "云": 497, + "人": 498, + "今": 499, + "付": 500, + "作": 501, + "侗": 502, + "依": 503, + "信": 504, + "傳": 505, + "儚": 506, + "充": 507, + "光": 508, + "全": 509, + "兵": 510, + "其": 511, + "具": 512, + "円": 513, + "再": 514, + "出": 515, + "判": 516, + "前": 517, + "剛": 518, + "劇": 519, + "劉": 520, + "動": 521, + "化": 522, + "北": 523, + "华": 524, + "厂": 525, + "去": 526, + "古": 527, + "可": 528, + "台": 529, + "史": 530, + "同": 531, + "名": 532, + "君": 533, + "吳": 534, + "周": 535, + "命": 536, + "和": 537, + "咲": 538, + "善": 539, + "四": 540, + "國": 541, + "園": 542, + "圣": 543, + "在": 544, + "坂": 545, + "堤": 546, + "場": 547, + "塘": 548, + "夕": 549, + "大": 550, + "天": 551, + "夫": 552, + "女": 553, + "妙": 554, + "姚": 555, + "子": 556, + "孟": 557, + "守": 558, + "安": 559, + "宋": 560, + "完": 561, + "宗": 562, + "宝": 563, + "宫": 564, + "寝": 565, + "寺": 566, + "小": 567, + "少": 568, + "尾": 569, + "山": 570, + "岳": 571, + "川": 572, + "州": 573, + "巳": 574, + "市": 575, + "師": 576, + "平": 577, + "广": 578, + "庆": 579, + "府": 580, + "座": 581, + "廬": 582, + "建": 583, + "式": 584, + "張": 585, + "彌": 586, + "彩": 587, + "彼": 588, + "後": 589, + "御": 590, + "德": 591, + "思": 592, + "愛": 593, + "憑": 594, + "憶": 595, + "應": 596, + "懷": 597, + "战": 598, + "戦": 599, + "扈": 600, + "技": 601, + "拉": 602, + "拳": 603, + "挑": 604, + "揺": 605, + "攻": 606, + "放": 607, + "政": 608, + "散": 609, + "斯": 610, + "方": 611, + "日": 612, + "旦": 613, + "旭": 614, + "昌": 615, + "明": 616, + "星": 617, + "春": 618, + "晋": 619, + "景": 620, + "曦": 621, + "月": 622, + "望": 623, + "未": 624, + "本": 625, + "李": 626, + "村": 627, + "杜": 628, + "束": 629, + "来": 630, + "林": 631, + "桜": 632, + "梶": 633, + "棘": 634, + "椎": 635, + "楊": 636, + "楚": 637, + "榮": 638, + "橘": 639, + "機": 640, + "正": 641, + "殻": 642, + "殿": 643, + "母": 644, + "水": 645, + "汉": 646, + "沂": 647, + "沙": 648, + "河": 649, + "泗": 650, + "波": 651, + "泣": 652, + "洪": 653, + "淹": 654, + "清": 655, + "湯": 656, + "漢": 657, + "澄": 658, + "澤": 659, + "火": 660, + "灯": 661, + "灵": 662, + "灼": 663, + "焼": 664, + "熱": 665, + "物": 666, + "狐": 667, + "狸": 668, + "玄": 669, + "王": 670, + "玩": 671, + "珂": 672, + "珙": 673, + "球": 674, + "理": 675, + "琦": 676, + "琪": 677, + "瓊": 678, + "生": 679, + "田": 680, + "畢": 681, + "番": 682, + "瘡": 683, + "白": 684, + "皮": 685, + "真": 686, + "砲": 687, + "礮": 688, + "祈": 689, + "神": 690, + "祠": 691, + "秋": 692, + "空": 693, + "立": 694, + "精": 695, + "約": 696, + "絵": 697, + "織": 698, + "義": 699, + "翠": 700, + "者": 701, + "耕": 702, + "肖": 703, + "胡": 704, + "膀": 705, + "臂": 706, + "興": 707, + "良": 708, + "花": 709, + "芳": 710, + "芽": 711, + "若": 712, + "英": 713, + "藕": 714, + "藥": 715, + "蘄": 716, + "蘇": 717, + "行": 718, + "裁": 719, + "規": 720, + "覺": 721, + "观": 722, + "解": 723, + "記": 724, + "誓": 725, + "誡": 726, + "誰": 727, + "謎": 728, + "许": 729, + "谭": 730, + "豪": 731, + "豫": 732, + "費": 733, + "贵": 734, + "赤": 735, + "趙": 736, + "足": 737, + "跡": 738, + "転": 739, + "辛": 740, + "逆": 741, + "遇": 742, + "運": 743, + "過": 744, + "遠": 745, + "選": 746, + "邦": 747, + "邱": 748, + "部": 749, + "郭": 750, + "都": 751, + "酈": 752, + "里": 753, + "野": 754, + "金": 755, + "銃": 756, + "鋼": 757, + "錄": 758, + "錡": 759, + "鍵": 760, + "鐵": 761, + "钱": 762, + "铁": 763, + "關": 764, + "防": 765, + "阿": 766, + "陈": 767, + "陳": 768, + "陽": 769, + "隊": 770, + "階": 771, + "集": 772, + "雪": 773, + "雲": 774, + "霖": 775, + "霹": 776, + "靂": 777, + "韓": 778, + "願": 779, + "顯": 780, + "颜": 781, + "马": 782, + "高": 783, + "龍": 784, + "ﷲ": 785, + "ﻋ": 786, + "/": 787, + "3": 788, + "~": 789, + "##w": 790, + "##i": 791, + "##s": 792, + "##e": 793, + "##n": 794, + "##r": 795, + "##a": 796, + "##l": 797, + "##u": 798, + "##k": 799, + "##m": 800, + "##o": 801, + "##g": 802, + "##p": 803, + "##t": 804, + "##v": 805, + "##y": 806, + "##d": 807, + "##c": 808, + "##h": 809, + "##f": 810, + "##b": 811, + "##4": 812, + "##j": 813, + "##の": 814, + "##z": 815, + "##1": 816, + "##5": 817, + "##6": 818, + "##0": 819, + "##7": 820, + "##9": 821, + "##q": 822, + "##ノ": 823, + "##ー": 824, + "##ɽ": 825, + "##3": 826, + "##2": 827, + "##8": 828, + "##x": 829, + "##ن": 830, + "##ا": 831, + "##ء": 832, + "##ɑ": 833, + "##ː": 834, + "##ə": 835, + "##უ": 836, + "##ს": 837, + "##ხ": 838, + "##რ": 839, + "##ი": 840, + "##お": 841, + "##り": 842, + "##ل": 843, + "##ع": 844, + "##ة": 845, + "##ォ": 846, + "##ル": 847, + "##ト": 848, + "##ゥ": 849, + "##ナ": 850, + "##ł": 851, + "##ʔ": 852, + "##ɪ": 853, + "##ʁ": 854, + "##ʻ": 855, + "##อ": 856, + "##ก": 857, + "##ว": 858, + "##ร": 859, + "##ス": 860, + "##テ": 861, + "##ィ": 862, + "##ニ": 863, + "##ᵻ": 864, + "##ˈ": 865, + "##ʊ": 866, + "##व": 867, + "##ा": 868, + "##ह": 869, + "##ि": 870, + "##क": 871, + "##ʃ": 872, + "##い": 873, + "##α": 874, + "##ν": 875, + "##χ": 876, + "##ε": 877, + "##ρ": 878, + "##η": 879, + "##ς": 880, + "##は": 881, + "##ه": 882, + "##ø": 883, + "##ɜ": 884, + "##о": 885, + "##в": 886, + "##е": 887, + "##т": 888, + "##с": 889, + "##к": 890, + "##а": 891, + "##я": 892, + "##ب": 893, + "##ص": 894, + "##ξ": 895, + "##エ": 896, + "##タ": 897, + "##イ": 898, + "##р": 899, + "##и": 900, + "##н": 901, + "##な": 902, + "##れ": 903, + "##る": 904, + "##л": 905, + "##у": 906, + "##ق": 907, + "##د": 908, + "##س": 909, + "##ण": 910, + "##स": 911, + "##न": 912, + "##ו": 913, + "##פ": 914, + "##ה": 915, + "##λ": 916, + "##τ": 917, + "##ذ": 918, + "##ي": 919, + "##う": 920, + "##き": 921, + "##つ": 922, + "##ο": 923, + "##υ": 924, + "##ო": 925, + "##く": 926, + "##ı": 927, + "##レ": 928, + "##ッ": 929, + "##チ": 930, + "##ल": 931, + "##ב": 932, + "##ת": 933, + "##א": 934, + "##י": 935, + "##з": 936, + "##ˌ": 937, + "##æ": 938, + "##ʿ": 939, + "##ई": 940, + "##ы": 941, + "##π": 942, + "##ι": 943, + "##κ": 944, + "##พ": 945, + "##م": 946, + "##ت": 947, + "##र": 948, + "##द": 949, + "##ラ": 950, + "##あ": 951, + "##ე": 952, + "##ლ": 953, + "##მ": 954, + "##წ": 955, + "##ფ": 956, + "##ל": 957, + "##σ": 958, + "##ן": 959, + "##ɔ": 960, + "##ش": 961, + "##و": 962, + "##ゆ": 963, + "##ग": 964, + "##ち": 965, + "##ゃ": 966, + "##ん": 967, + "##フ": 968, + "##ס": 969, + "##ツ": 970, + "##ネ": 971, + "##ク": 972, + "##シ": 973, + "##ョ": 974, + "##ン": 975, + "##す": 976, + "##め": 977, + "##ტ": 978, + "##º": 979, + "##か": 980, + "##た": 981, + "##đ": 982, + "##マ": 983, + "##נ": 984, + "##ハ": 985, + "##μ": 986, + "##ر": 987, + "##ᅦ": 988, + "##ᄃ": 989, + "##ᅩ": 990, + "##ᆨ": 991, + "##ᄅ": 992, + "##ᅵ": 993, + "##ᆸ": 994, + "##ᄇ": 995, + "##ᅧ": 996, + "##ᆼ": 997, + "##ᄋ": 998, + "##ᆫ": 999, + "##ᅢ": 1000, + "##ค": 1001, + "##ต": 1002, + "##ɡ": 1003, + "##ɒ": 1004, + "##っ": 1005, + "##ш": 1006, + "##ц": 1007, + "##ь": 1008, + "##カ": 1009, + "##ა": 1010, + "##ю": 1011, + "##ᅮ": 1012, + "##ュ": 1013, + "##ܕ": 1014, + "##ܝ": 1015, + "##ܢ": 1016, + "##ܬ": 1017, + "##ミ": 1018, + "##д": 1019, + "##ਲ": 1020, + "##ਾ": 1021, + "##ਹ": 1022, + "##ള": 1023, + "##ß": 1024, + "##ვ": 1025, + "##β": 1026, + "##θ": 1027, + "##こ": 1028, + "##ロ": 1029, + "##キ": 1030, + "##γ": 1031, + "##ア": 1032, + "##ら": 1033, + "##ウ": 1034, + "##ァ": 1035, + "##リ": 1036, + "##3": 1037, + "##ล": 1038, + "##ย": 1039, + "##า": 1040, + "##ณ": 1041, + "##ม": 1042, + "##ð": 1043, + "##δ": 1044, + "##サ": 1045, + "##し": 1046, + "##ま": 1047, + "##ნ": 1048, + "##ป": 1049, + "##ɛ": 1050, + "##せ": 1051, + "##ซ": 1052, + "##ʒ": 1053, + "##დ": 1054, + "##ল": 1055, + "##œ": 1056, + "##ω": 1057, + "##є": 1058, + "##ك": 1059, + "##เ": 1060, + "##х": 1061, + "##ɫ": 1062, + "##ჲ": 1063, + "##ɾ": 1064, + "##ʲ": 1065, + "##გ": 1066, + "##м": 1067, + "##ะ": 1068, + "##ช": 1069, + "##น": 1070, + "##ภ": 1071, + "##ด": 1072, + "##ف": 1073, + "##ც": 1074, + "##と": 1075, + "##ჱ": 1076, + "##ъ": 1077, + "##ム": 1078, + "##ェ": 1079, + "##خ": 1080, + "##ฮ": 1081, + "##თ": 1082, + "##ง": 1083, + "##ไ": 1084, + "##п": 1085, + "##ჳ": 1086, + "##ж": 1087, + "##ث": 1088, + "##ษ": 1089, + "##ฐ": 1090, + "##わ": 1091, + "##ɐ": 1092, + "##ᅡ": 1093, + "##ᆯ": 1094, + "##ャ": 1095, + "##モ": 1096, + "##ɳ": 1097, + "##ح": 1098, + "##ם": 1099, + "##を": 1100, + "##み": 1101, + "##ょ": 1102, + "##に": 1103, + "##ܠ": 1104, + "##ܗ": 1105, + "##ܐ": 1106, + "##ქ": 1107, + "##ാ": 1108, + "##ഹ": 1109, + "##ひ": 1110, + "##ש": 1111, + "##מ": 1112, + "##צ": 1113, + "##แ": 1114, + "##г": 1115, + "##も": 1116, + "##ף": 1117, + "##セ": 1118, + "##オ": 1119, + "##ר": 1120, + "##া": 1121, + "##হ": 1122, + "##ψ": 1123 + } + } +} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..e8171f3 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,17 @@ +{ + "cls_token": "[CLS]", + "do_basic_tokenize": true, + "do_lower_case": true, + "mask_token": "[MASK]", + "max_len": 512, + "model_max_length": 512, + "name_or_path": "temp/dummy/layoutlm/processors", + "never_split": null, + "pad_token": "[PAD]", + "sep_token": "[SEP]", + "special_tokens_map_file": "/home/huggingface/.cache/huggingface/hub/models--microsoft--layoutlm-base-uncased/snapshots/ca841ce8d2f46b13b0ac3f635b8eb7d2e1d758d5/special_tokens_map.json", + "strip_accents": null, + "tokenize_chinese_chars": true, + "tokenizer_class": "LayoutLMTokenizer", + "unk_token": "[UNK]" +} diff --git a/vocab.txt b/vocab.txt new file mode 100644 index 0000000..e6cfff3 --- /dev/null +++ b/vocab.txt @@ -0,0 +1,1124 @@ +[PAD] +[UNK] +[CLS] +[SEP] +[MASK] +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +[ +\ +] +^ +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +| +} +~ +¡ +¢ +£ +¥ +§ +° +± +² +³ +´ +µ +· +º +½ +¿ +× +ß +æ +ð +ø +þ +đ +ħ +ı +ł +œ +ɐ +ɑ +ɒ +ɔ +ə +ɛ +ɜ +ɡ +ɢ +ɪ +ɫ +ɳ +ɽ +ɾ +ʁ +ʃ +ʊ +ʋ +ʒ +ʔ +ʕ +ʲ +ʻ +ʼ +ʾ +ʿ +ˈ +ˌ +ː +α +β +γ +δ +ε +η +θ +ι +κ +λ +μ +ν +ξ +ο +π +ρ +ς +σ +τ +υ +φ +χ +ψ +ω +а +б +в +г +д +е +ж +з +и +к +л +м +н +о +п +р +с +т +у +х +ц +ш +ъ +ы +ь +ю +я +є +א +ב +ג +ה +ו +ז +ח +י +ל +ם +מ +ן +נ +ס +ף +פ +צ +ר +ש +ת +ء +ا +ب +ة +ت +ث +ج +ح +خ +د +ذ +ر +س +ش +ص +ع +ف +ق +ك +ل +م +ن +ه +و +ي +ܐ +ܕ +ܗ +ܝ +ܠ +ܢ +ܬ +अ +ई +क +ग +ण +त +द +न +प +ब +म +य +र +ल +व +स +ह +ा +ि +আ +ল +হ +া +ਅ +ਲ +ਹ +ਾ +അ +ള +ഹ +ാ +ก +ค +ง +ช +ซ +ญ +ฐ +ณ +ด +ต +น +บ +ป +พ +ภ +ม +ย +ร +ล +ว +ศ +ษ +ส +ห +อ +ฮ +ะ +า +เ +แ +ไ +ა +ბ +გ +დ +ე +ვ +ზ +თ +ი +კ +ლ +მ +ნ +ო +პ +ჟ +რ +ს +ტ +უ +ფ +ქ +ღ +ყ +შ +ჩ +ც +ძ +წ +ჭ +ხ +ჯ +ჰ +ჱ +ჲ +ჳ +ჴ +ჵ +ჶ +ჷ +ჸ +ჹ +ჺ +჻ +ᄃ +ᄅ +ᄇ +ᄋ +ᄌ +ᅡ +ᅢ +ᅦ +ᅧ +ᅩ +ᅮ +ᅵ +ᆨ +ᆫ +ᆯ +ᆸ +ᆼ +ᵻ +‐ +‑ +– +— +― +‘ +’ +“ +” +„ +† +‡ +• +… +′ +″ +⁄ +₣ +₤ +€ +₹ +⅓ +⅔ +→ +− +≡ +≤ +① +☉ +☫ +♀ +♭ +♯ +⚳ +ⴀ +ⴂ +ⴃ +ⴈ +ⴌ +ⴕ +ⴟ +〈 +〉 +〜 +あ +い +う +お +か +き +く +け +こ +さ +し +す +せ +た +ち +っ +つ +と +な +に +の +は +ひ +ふ +ほ +ま +み +め +も +ゃ +ゆ +ょ +ら +り +る +れ +わ +を +ん +ァ +ア +ィ +イ +ゥ +ウ +ェ +エ +ォ +オ +カ +キ +ク +ケ +コ +サ +シ +ス +セ +タ +チ +ッ +ツ +テ +ト +ナ +ニ +ネ +ノ +ハ +フ +ヘ +マ +ミ +ム +モ +ャ +ュ +ョ +ラ +リ +ル +レ +ロ +ン +・ +ー +一 +七 +下 +世 +丙 +中 +主 +乃 +之 +乙 +九 +二 +云 +人 +今 +付 +作 +侗 +依 +信 +傳 +儚 +充 +光 +全 +兵 +其 +具 +円 +再 +出 +判 +前 +剛 +劇 +劉 +動 +化 +北 +华 +厂 +去 +古 +可 +台 +史 +同 +名 +君 +吳 +周 +命 +和 +咲 +善 +四 +國 +園 +圣 +在 +坂 +堤 +場 +塘 +夕 +大 +天 +夫 +女 +妙 +姚 +子 +孟 +守 +安 +宋 +完 +宗 +宝 +宫 +寝 +寺 +小 +少 +尾 +山 +岳 +川 +州 +巳 +市 +師 +平 +广 +庆 +府 +座 +廬 +建 +式 +張 +彌 +彩 +彼 +後 +御 +德 +思 +愛 +憑 +憶 +應 +懷 +战 +戦 +扈 +技 +拉 +拳 +挑 +揺 +攻 +放 +政 +散 +斯 +方 +日 +旦 +旭 +昌 +明 +星 +春 +晋 +景 +曦 +月 +望 +未 +本 +李 +村 +杜 +束 +来 +林 +桜 +梶 +棘 +椎 +楊 +楚 +榮 +橘 +機 +正 +殻 +殿 +母 +水 +汉 +沂 +沙 +河 +泗 +波 +泣 +洪 +淹 +清 +湯 +漢 +澄 +澤 +火 +灯 +灵 +灼 +焼 +熱 +物 +狐 +狸 +玄 +王 +玩 +珂 +珙 +球 +理 +琦 +琪 +瓊 +生 +田 +畢 +番 +瘡 +白 +皮 +真 +砲 +礮 +祈 +神 +祠 +秋 +空 +立 +精 +約 +絵 +織 +義 +翠 +者 +耕 +肖 +胡 +膀 +臂 +興 +良 +花 +芳 +芽 +若 +英 +藕 +藥 +蘄 +蘇 +行 +裁 +規 +覺 +观 +解 +記 +誓 +誡 +誰 +謎 +许 +谭 +豪 +豫 +費 +贵 +赤 +趙 +足 +跡 +転 +辛 +逆 +遇 +運 +過 +遠 +選 +邦 +邱 +部 +郭 +都 +酈 +里 +野 +金 +銃 +鋼 +錄 +錡 +鍵 +鐵 +钱 +铁 +關 +防 +阿 +陈 +陳 +陽 +隊 +階 +集 +雪 +雲 +霖 +霹 +靂 +韓 +願 +顯 +颜 +马 +高 +龍 +ﷲ +ﻋ +/ +3 +~ +##w +##i +##s +##e +##n +##r +##a +##l +##u +##k +##m +##o +##g +##p +##t +##v +##y +##d +##c +##h +##f +##b +##4 +##j +##の +##z +##1 +##5 +##6 +##0 +##7 +##9 +##q +##ノ +##ー +##ɽ +##3 +##2 +##8 +##x +##ن +##ا +##ء +##ɑ +##ː +##ə +##უ +##ს +##ხ +##რ +##ი +##お +##り +##ل +##ع +##ة +##ォ +##ル +##ト +##ゥ +##ナ +##ł +##ʔ +##ɪ +##ʁ +##ʻ +##อ +##ก +##ว +##ร +##ス +##テ +##ィ +##ニ +##ᵻ +##ˈ +##ʊ +##व +##ा +##ह +##ि +##क +##ʃ +##い +##α +##ν +##χ +##ε +##ρ +##η +##ς +##は +##ه +##ø +##ɜ +##о +##в +##е +##т +##с +##к +##а +##я +##ب +##ص +##ξ +##エ +##タ +##イ +##р +##и +##н +##な +##れ +##る +##л +##у +##ق +##د +##س +##ण +##स +##न +##ו +##פ +##ה +##λ +##τ +##ذ +##ي +##う +##き +##つ +##ο +##υ +##ო +##く +##ı +##レ +##ッ +##チ +##ल +##ב +##ת +##א +##י +##з +##ˌ +##æ +##ʿ +##ई +##ы +##π +##ι +##κ +##พ +##م +##ت +##र +##द +##ラ +##あ +##ე +##ლ +##მ +##წ +##ფ +##ל +##σ +##ן +##ɔ +##ش +##و +##ゆ +##ग +##ち +##ゃ +##ん +##フ +##ס +##ツ +##ネ +##ク +##シ +##ョ +##ン +##す +##め +##ტ +##º +##か +##た +##đ +##マ +##נ +##ハ +##μ +##ر +##ᅦ +##ᄃ +##ᅩ +##ᆨ +##ᄅ +##ᅵ +##ᆸ +##ᄇ +##ᅧ +##ᆼ +##ᄋ +##ᆫ +##ᅢ +##ค +##ต +##ɡ +##ɒ +##っ +##ш +##ц +##ь +##カ +##ა +##ю +##ᅮ +##ュ +##ܕ +##ܝ +##ܢ +##ܬ +##ミ +##д +##ਲ +##ਾ +##ਹ +##ള +##ß +##ვ +##β +##θ +##こ +##ロ +##キ +##γ +##ア +##ら +##ウ +##ァ +##リ +##3 +##ล +##ย +##า +##ณ +##ม +##ð +##δ +##サ +##し +##ま +##ნ +##ป +##ɛ +##せ +##ซ +##ʒ +##დ +##ল +##œ +##ω +##є +##ك +##เ +##х +##ɫ +##ჲ +##ɾ +##ʲ +##გ +##м +##ะ +##ช +##น +##ภ +##ด +##ف +##ც +##と +##ჱ +##ъ +##ム +##ェ +##خ +##ฮ +##თ +##ง +##ไ +##п +##ჳ +##ж +##ث +##ษ +##ฐ +##わ +##ɐ +##ᅡ +##ᆯ +##ャ +##モ +##ɳ +##ح +##ם +##を +##み +##ょ +##に +##ܠ +##ܗ +##ܐ +##ქ +##ാ +##ഹ +##ひ +##ש +##מ +##צ +##แ +##г +##も +##ף +##セ +##オ +##ר +##া +##হ +##ψ