blenderbot-400M-distill/vocab.json

1 line
124 KiB
JSON
Raw Normal View History

2020-12-21 18:03:18 +08:00
{"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "<s_AGAIN>": 4, "<pad_AGAIN>": 5, "</s_AGAIN>": 6, "<unk_AGAIN>": 7, "!": 8, "\"": 9, "#": 10, "$": 11, "%": 12, "&": 13, "'": 14, "(": 15, ")": 16, "*": 17, "+": 18, ",": 19, "-": 20, ".": 21, "/": 22, "0": 23, "1": 24, "2": 25, "3": 26, "4": 27, "5": 28, "6": 29, "7": 30, "8": 31, "9": 32, ":": 33, ";": 34, "<": 35, "=": 36, ">": 37, "?": 38, "@": 39, "A": 40, "B": 41, "C": 42, "D": 43, "E": 44, "F": 45, "G": 46, "H": 47, "I": 48, "J": 49, "K": 50, "L": 51, "M": 52, "N": 53, "O": 54, "P": 55, "Q": 56, "R": 57, "S": 58, "T": 59, "U": 60, "V": 61, "W": 62, "X": 63, "Y": 64, "Z": 65, "[": 66, "\\": 67, "]": 68, "^": 69, "_": 70, "`": 71, "a": 72, "b": 73, "c": 74, "d": 75, "e": 76, "f": 77, "g": 78, "h": 79, "i": 80, "j": 81, "k": 82, "l": 83, "m": 84, "n": 85, "o": 86, "p": 87, "q": 88, "r": 89, "s": 90, "t": 91, "u": 92, "v": 93, "w": 94, "x": 95, "y": 96, "z": 97, "{": 98, "|": 99, "}": 100, "~": 101, "¡": 102, "¢": 103, "£": 104, "¤": 105, "¥": 106, "¦": 107, "§": 108, "¨": 109, "©": 110, "ª": 111, "«": 112, "¬": 113, "®": 114, "¯": 115, "°": 116, "±": 117, "²": 118, "³": 119, "´": 120, "µ": 121, "¶": 122, "·": 123, "¸": 124, "¹": 125, "º": 126, "»": 127, "¼": 128, "½": 129, "¾": 130, "¿": 131, "À": 132, "Á": 133, "Â": 134, "Ã": 135, "Ä": 136, "Å": 137, "Æ": 138, "Ç": 139, "È": 140, "É": 141, "Ê": 142, "Ë": 143, "Ì": 144, "Í": 145, "Î": 146, "Ï": 147, "Ð": 148, "Ñ": 149, "Ò": 150, "Ó": 151, "Ô": 152, "Õ": 153, "Ö": 154, "×": 155, "Ø": 156, "Ù": 157, "Ú": 158, "Û": 159, "Ü": 160, "Ý": 161, "Þ": 162, "ß": 163, "à": 164, "á": 165, "â": 166, "ã": 167, "ä": 168, "å": 169, "æ": 170, "ç": 171, "è": 172, "é": 173, "ê": 174, "ë": 175, "ì": 176, "í": 177, "î": 178, "ï": 179, "ð": 180, "ñ": 181, "ò": 182, "ó": 183, "ô": 184, "õ": 185, "ö": 186, "÷": 187, "ø": 188, "ù": 189, "ú": 190, "û": 191, "ü": 192, "ý": 193, "þ": 194, "ÿ": 195, "Ā": 196, "ā": 197, "Ă": 198, "ă": 199, "Ą": 200, "ą": 201, "Ć": 202, "ć": 203, "Ĉ": 204, "ĉ": 205, "Ċ": 206, "ċ": 207, "Č": 208, "č": 209, "Ď": 210, "ď": 211, "Đ": 212, "đ": 213, "Ē": 214, "ē": 215, "Ĕ": 216, "ĕ": 217, "Ė": 218, "ė": 219, "Ę": 220, "ę": 221, "Ě": 222, "ě": 223, "Ĝ": 224, "ĝ": 225, "Ğ": 226, "ğ": 227, "Ġ": 228, "ġ": 229, "Ģ": 230, "ģ": 231, "Ĥ": 232, "ĥ": 233, "Ħ": 234, "ħ": 235, "Ĩ": 236, "ĩ": 237, "Ī": 238, "ī": 239, "Ĭ": 240, "ĭ": 241, "Į": 242, "į": 243, "İ": 244, "ı": 245, "IJ": 246, "ij": 247, "Ĵ": 248, "ĵ": 249, "Ķ": 250, "ķ": 251, "ĸ": 252, "Ĺ": 253, "ĺ": 254, "Ļ": 255, "ļ": 256, "Ľ": 257, "ľ": 258, "Ŀ": 259, "ŀ": 260, "Ł": 261, "ł": 262, "Ń": 263, "Ġt": 264, "Ġa": 265, "he": 266, "in": 267, "Ġs": 268, "re": 269, "Ġw": 270, "Ġthe": 271, "ou": 272, "on": 273, "ha": 274, "Ġb": 275, "er": 276, "it": 277, "ing": 278, "Ġm": 279, "Ġc": 280, "ĠI": 281, "is": 282, "nd": 283, "Ġf": 284, "or": 285, "Ġp": 286, "Ġto": 287, "Ġd": 288, "ll": 289, "es": 290, "Ġo": 291, "en": 292, "Ġl": 293, "Ġg": 294, "an": 295, "ed": 296, "Ġy": 297, "Ġand": 298, "hat": 299, "st": 300, "Ġn": 301, "Ġin": 302, "at": 303, "Ġyou": 304, "ar": 305, "Ġof": 306, "om": 307, "le": 308, "ve": 309, "Ġbe": 310, "ot": 311, "Ġit": 312, "Ġha": 313, "Ġe": 314, "Ġis": 315, "Ġth": 316, "ic": 317, "as": 318, "Ġthat": 319, "ow": 320, "ay": 321, "al": 322, "et": 323, "Ġon": 324, "Ġre": 325, "Ġh": 326, "ĠT": 327, "ly": 328, "ut": 329, "se": 330, "ld": 331, "Ġu": 332, "id": 333, "ke": 334, "Ġfor": 335, "ver": 336, "ion": 337, "Ġhe": 338, "ac": 339, "im": 340, "'s": 341, "'t": 342, "am": 343, "all": 344, "gh": 345, "Ġst": 346, "ent": 347, "out": 348, "ĠA": 349, "ro": 350, "Ġan": 351, "ad": 352, "ĠS": 353, "ith": 354, "if": 355, "ould": 356, "oo": 357, "Ġwith": 358, "Ġwe": 359, "Ġhave": 360, "Ġdo": 361, "ill": 362, "ir": 363, "Ġj": 364, "ally": 365, "Ġare": 366, "ust": 367, "Ġnot": 368, "Ġli": 369, "ĠW": 370, "Ġas": 371, "Ġwas": 372, "Ġbut": 373, "op": 374, "Ġr": 375, "..": 376