From b965c5deee645e96dcc40a8cdd260a7595b93354 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 23 Feb 2022 11:44:13 +0100 Subject: [PATCH] Keeping the vocab to 99. --- merges.txt | 646 ---------------- tokenizer.json | 1904 +++++------------------------------------------- vocab.json | 96 ++- 3 files changed, 267 insertions(+), 2379 deletions(-) diff --git a/merges.txt b/merges.txt index b7498ce..0809d44 100644 --- a/merges.txt +++ b/merges.txt @@ -1,647 +1 @@ #version: 0.2 - Trained by `huggingface/tokenizers` -Ġ t -Ġt h -Ġ a -Ġth e -i n -Ġ o -Ġ , -Ġ s -e d -Ġ w -e r -Ġ . -Ġ i -r e -Ġ c -n d -Ġ f -Ġ b -a t -Ġo f -e r -e n -a r -o r -i t -Ġ p -Ġ h -Ġa nd -o n -in g -a n -r o -Ġ m -Ġ d -e s -Ġi n -o n -Ġt o -o u -i s -Ġ a -i c -Ġ T -a l -Ġ l -Ġ = -Ġ re -Ġ " -e s -Ġ S -a s -a l -i l -e l -i on -Ġ A -Ġ C -Ġ 1 -Ġ Ċ -u r -ĠT h -Ġ n -a s -Ġ @ -e c -o m -a c -Ġ e -Ġw as -Ġ M -o r -a n -a m -e n -o l -Ġ in -Ġ g -Ġ ' -Ġ B -l y -a t -i v -t s -ĠTh e -u s -- @ -Ġ@ -@ -i s -Ġ I -Ġw h -i g -Ġ H -Ġs t -o s -u n -t h -Ġ P -Ġw it -Ġth at -i r -Ġa s -e m -Ġo n -r a -Ġf or -Ġ R -e t -o w -Ġ 2 -i d -Ġ D -l e -Ġwit h -l a -en t -i m -Ġ F -e a -i on -Ġb y -Ġ ) -Ġ ( -Ġa l -Ġc on -en t -Ġ W -Ġi s -er e -Ġ G -Ġ N -Ġ L -Ġh a -er s -r i -t h -t ed -u c -Ġ J -Ġ1 9 -e v -u l -Ġ v -c e -at ion -ro m -Ġb e -Ġ E -i n -Ġth e -Ġf rom -Ġ O -t er -Ġp ro -Ġa r -a d -Ġc om -i c -a g -Ġh is -Ġs h -Ġa t -o v -i es -o o -p p -s t -c h -Ġ r -Ġ2 0 -a y -i f -Ġw ere -Ġc h -u t -s t -u t -d s -o p -u m -Ġi t -o c -t er -l e -ig h -u d -Ġe x -ion s -at e -it y -at ed -Ġ un -e p -q u -Ġn o -Ġ K -iv e -is t -Ġo n -am e -ou n -i r -a b -Ġ â -in g -Ġh e -l d -u g -ic h -Ġa n -e d -Ġ k -Ġâ Ģ -Ġha d -v e -a in -Ġs e -t ion -or e -re s -Ġwh ich -ĠI n -o d -th er -a k -Ġs p -a r -Ġ y -ĠC h -on g -Ġa c -es t -Ġ U -a p -f f -al ly -r it -ĠS t -u b -g e -b er -e t -Ġb e -e ar -Ġre c -er s -Ġf ir -o t -Ġar e -Ġa n -c h -o g -i a -es t -in e -il l -an d -e l -ar y -e w -i d -Ġf or -Ġ ; -Ġcom p -Ġ V -Ġin c -t r -Ġ20 0 -Ġthe ir -u s -Ġb ut -r an -ic al -Ġfir st -Ġd e -Ġin t -Ġ ro -s o -ĠâĢ ĵ -Ġno t -d ing -f ter -ur e -Ġp ar -Ġ : -i an -Ġt w -ou ld -Ġal so -Ġi ts -Ġw or -u m -Ġo r -os t -0 0 -ou r -ar d -Ġre s -m p -u e -Ġa b -is h -Ġcon t -Ġa d -ow n -al l -ou g -Ġh er -as t -Ġ en -om e -al l -d ed -o w -Ġha ve -Ġ us -ea r -ac k -d uc -i al -s s -en ts -a in -t ing -Ġon e -es s -Ġh as -igh t -a v -Ġe v -ou t -a y -en ce -Ġbe en -e w -Ġtw o -Ġc l -d er -im e -k s -es s -is h -. @ -Ġ@ .@ -Ġp la -Ġp l -Ġo r -u p -m ent -ur ing -ol l -ĠI n -Ġth is -Ġb ec -Ġcom m -Ġd is -at er -ag e -Ġa pp -ou s -e y -i l -p er -ĠA l -ion al -l ud -el y -t t -il e -i z -Ġ j -Ġwh o -Ġa g -i b -Ġthe y -f or -Ġo v -at h -e g -Ġs c -i p -Ġ20 1 -Ġ 3 -Ġp er -or y -Ġd es -id e -Ġs er -s e -ĠH e -la nd -at ions -r ic -i t -re s -er ed -Ġp re -ĠS h -an ce -or t -an t -, @ -Ġ@ ,@ -el l -Ġ Y -n ed -el l -it e -Ġinc lud -Ġre p -Ġa fter -Ġs uc -re e -an y -i m -or t -Ġ1 8 -Ġs u -ad e -ou r -ĠU n -ĠI t -i k -ĠM ar -em ber -Ġ 1 -e en -a nd -Ġs ec -ic e -Ġt ime -ĠA n -Ġint o -Ġf in -Ġo ther -Ġa tt -il l -re n -ac h -as s -er al -es e -s h -al s -it ion -oug h -l es -am p -Ġw ould -Ġm ore -ro ug -ri b -er y -ac e -Ġ A -Ġpla y -it ed -k ed -is t -i ed -Ġ 2 -as ed -ing s -an g -a m -i p -Ġb o -ab le -t y -Ġch ar -Ġc ent -et w -at es -ro p -Ġ I -u nd -ĠA m -c es -o in -Ġin ter -u p -c t -on e -Ġt ra -an t -ec t -Ġal l -e f -Ġcon s -ub l -n ing -an s -Ġf e -us t -Ġ 0 -Ġre m -as e -on g -Ġwh en -e b -ĠW h -Ġe ar -ev er -Ġov er -Ġk n -a us -Ġp os -a d -er m -Ġsh e -Ġ ra -Ġd uring -as on -v i -Ġex p -Ġl ea -Ġ el -Ġ 4 -Ġon ly -o nd -Ġd ec -Ġac c -Ġo ff -is s -Ġf l -ĠE n -o t -en s -os e -ak e -o m -Ġs ev -ac h -etw een -er n -Ġ 3 -Ġp r -Ġg ro -r uc -Ġd i -Ġ19 9 -ĠA r -Ġg ame -Ġh im -oo k -Ġ up -Ġab out -Ġre l -for m -Ġth ree -at t -ĠC om -Ġs a -ear s -Ġ 5 -r y -Ġi mp -Ġm ost -f er -Ġp res -Ġf il -Ġb etween -Ġbe g -p h -or s -Ġth an -Ġrec or -o b -er ic -at ing -Ġth roug -k ing -Ġo ut -Ġn um -oo d -oll ow -ac t -u il -Ġc re -ol og -at ional -Ġpro duc -Ġwh ile -Ġl ater -Ġw rit -e x -Ġst ar -Ġsp ec -e e -ish ed -Ġre g -is ion -ou th -Ġre le -Ġa ss -Ġse ason -Ġm ade -il y -r u -o y -t ur -t e -Ġ qu -Ġm ov -ur y -ĠAm eric -em ent -c c -ou nd -Ġl ar -Ġfor m -ec t -Ġde f -Ġm us -ĠP ar -Ġm e -Ġs ub -w ay -o p -o h -el d -i e -em p -am es -er n -Ġn or -iv ed -ev el -Ġsuc h -ar ds -Ġin d -ik e -Ġg en -er t -Ġy ear -Ġus ed -Ġn ew -Ġ 5 -Ġal b -s p -y p -Ġwit h -Ġwh ere -ic s -ĠTh is -Ġthe m -w n diff --git a/tokenizer.json b/tokenizer.json index 760bdbc..e14344a 100644 --- a/tokenizer.json +++ b/tokenizer.json @@ -1,1737 +1,177 @@ { - "version": "1.0", - "truncation": null, - "padding": null, - "added_tokens": [ - { - "id": 0, - "special": true, - "content": "<|startoftext|>", - "single_word": false, - "lstrip": false, - "rstrip": false, - "normalized": true + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "special": true, + "content": "<|startoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true + }, + { + "id": 1, + "special": true, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + } + ], + "normalizer": { + "type": "Sequence", + "normalizers": [ + { + "type": "NFC" + }, + { + "type": "Replace", + "pattern": { + "Regex": "\\s+" + }, + "content": " " + }, + { + "type": "Lowercase" + } + ] }, - { - "id": 1, - "special": true, - "content": "<|endoftext|>", - "single_word": false, - "lstrip": false, - "rstrip": false, - "normalized": false - } - ], - "normalizer": { - "type": "Sequence", - "normalizers": [ - { - "type": "NFC" - }, - { - "type": "Replace", - "pattern": { - "Regex": "\\s+" - }, - "content": " " - }, - { - "type": "Lowercase" - } - ] - }, - "pre_tokenizer": { - "type": "Sequence", - "pretokenizers": [ - { - "type": "Split", - "pattern": { - "Regex": "'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+" - }, - "behavior": "Removed", - "invert": true - }, - { + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+" + }, + "behavior": "Removed", + "invert": true + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true + } + ] + }, + "post_processor": { + "type": "RobertaProcessing", + "sep": ["<|endoftext|>", 1], + "cls": ["<|startoftext|>", 0], + "trim_offsets": false, + "add_prefix_space": false + }, + "decoder": { "type": "ByteLevel", - "add_prefix_space": false, + "add_prefix_space": true, "trim_offsets": true - } - ] - }, - "post_processor": { - "type": "RobertaProcessing", - "sep": [ - "<|endoftext|>", - 1 - ], - "cls": [ - "<|startoftext|>", - 0 - ], - "trim_offsets": false, - "add_prefix_space": false - }, - "decoder": { - "type": "ByteLevel", - "add_prefix_space": true, - "trim_offsets": true - }, - "model": { - "type": "BPE", - "dropout": null, - "unk_token": "<|endoftext|>", - "continuing_subword_prefix": "", - "end_of_word_suffix": "", - "fuse_unk": false, - "vocab": { - "<|startoftext|>": 0, - "<|endoftext|>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "|": 92, - "}": 93, - "~": 94, - "¡": 95, - "¢": 96, - "£": 97, - "¤": 98, - "¥": 99, - "¦": 100, - "§": 101, - "¨": 102, - "©": 103, - "ª": 104, - "«": 105, - "¬": 106, - "®": 107, - "¯": 108, - "°": 109, - "±": 110, - "²": 111, - "³": 112, - "´": 113, - "µ": 114, - "¶": 115, - "·": 116, - "¸": 117, - "¹": 118, - "º": 119, - "»": 120, - "¼": 121, - "½": 122, - "¾": 123, - "¿": 124, - "Â": 125, - "Ã": 126, - "Ä": 127, - "Å": 128, - "Æ": 129, - "Ç": 130, - "È": 131, - "É": 132, - "Ê": 133, - "Ë": 134, - "Ì": 135, - "Í": 136, - "Î": 137, - "Ï": 138, - "Ð": 139, - "Ñ": 140, - "Ö": 141, - "×": 142, - "Ø": 143, - "Ù": 144, - "Ü": 145, - "à": 146, - "á": 147, - "â": 148, - "ã": 149, - "ä": 150, - "å": 151, - "æ": 152, - "ç": 153, - "è": 154, - "é": 155, - "ë": 156, - "ì": 157, - "ï": 158, - "Ċ": 159, - "Ġ": 160, - "Ģ": 161, - "ģ": 162, - "Ĥ": 163, - "ĥ": 164, - "Ħ": 165, - "ħ": 166, - "Ĩ": 167, - "ĩ": 168, - "Ī": 169, - "ī": 170, - "Ĭ": 171, - "ĭ": 172, - "Į": 173, - "į": 174, - "İ": 175, - "ı": 176, - "IJ": 177, - "ij": 178, - "Ĵ": 179, - "ĵ": 180, - "Ķ": 181, - "ķ": 182, - "ĸ": 183, - "Ĺ": 184, - "ĺ": 185, - "Ļ": 186, - "ļ": 187, - "Ľ": 188, - "ľ": 189, - "Ŀ": 190, - "ŀ": 191, - "Ł": 192, - "ł": 193, - "Ń": 194, - "e": 195, - "d": 196, - "a": 197, - "o": 198, - "n": 199, - "±": 200, - "l": 201, - "m": 202, - "h": 203, - "r": 204, - "i": 205, - "s": 206, - "Z": 207, - "t": 208, - "f": 209, - "k": 210, - "y": 211, - "b": 212, - "F": 213, - "g": 214, - "7": 215, - "0": 216, - "p": 217, - "L": 218, - "H": 219, - "¡": 220, - "Ī": 221, - "1": 222, - "Ģ": 223, - "c": 224, - "ĩ": 225, - "6": 226, - "A": 227, - "z": 228, - "u": 229, - "S": 230, - "2": 231, - "v": 232, - "4": 233, - "M": 234, - "T": 235, - "8": 236, - "I": 237, - "N": 238, - "C": 239, - "5": 240, - "¹": 241, - "9": 242, - "3": 243, - "ī": 244, - "P": 245, - "E": 246, - "»": 247, - "V": 248, - "İ": 249, - "w": 250, - "J": 251, - "ł": 252, - ".": 253, - "K": 254, - "D": 255, - "Ķ": 256, - "¸": 257, - "B": 258, - "©": 259, - "º": 260, - "µ": 261, - "Ĥ": 262, - "X": 263, - "R": 264, - "O": 265, - "«": 266, - "Ļ": 267, - "U": 268, - "x": 269, - "[": 270, - "¿": 271, - "³": 272, - "ģ": 273, - "W": 274, - "§": 275, - "-": 276, - "ĸ": 277, - "Ħ": 278, - ",": 279, - "q": 280, - "ħ": 281, - "¨": 282, - "G": 283, - "²": 284, - "ĺ": 285, - "ª": 286, - "¯": 287, - "j": 288, - "]": 289, - "ļ": 290, - "Ŀ": 291, - "¤": 292, - "ŀ": 293, - "½": 294, - "IJ": 295, - "'": 296, - "Ń": 297, - "°": 298, - "ľ": 299, - ">": 300, - "¶": 301, - "į": 302, - "¦": 303, - "|": 304, - "¼": 305, - "¢": 306, - "´": 307, - "Ĩ": 308, - "Q": 309, - "Y": 310, - "Ľ": 311, - "ĵ": 312, - "ij": 313, - "ķ": 314, - "Ĭ": 315, - "¾": 316, - ";": 317, - "(": 318, - "¬": 319, - "@": 320, - "ĭ": 321, - "Ĺ": 322, - "£": 323, - "Į": 324, - "#": 325, - "·": 326, - "*": 327, - "Ĵ": 328, - "®": 329, - ")": 330, - "^": 331, - "ı": 332, - "Ġ": 333, - "_": 334, - "Ł": 335, - "}": 336, - "ĥ": 337, - "\\": 338, - "¥": 339, - "<": 340, - "+": 341, - "=": 342, - "~": 343, - "\"": 344, - "!": 345, - "?": 346, - "`": 347, - "$": 348, - "Ċ": 349, - "/": 350, - "%": 351, - "&": 352, - ":": 353, - "Ġt": 354, - "Ġth": 355, - "Ġa": 356, - "Ġthe": 357, - "in": 358, - "Ġo": 359, - "Ġ,": 360, - "Ġs": 361, - "ed": 362, - "Ġw": 363, - "er": 364, - "Ġ.": 365, - "Ġi": 366, - "re": 367, - "Ġc": 368, - "nd": 369, - "Ġf": 370, - "Ġb": 371, - "at": 372, - "Ġof": 373, - "er": 374, - "en": 375, - "ar": 376, - "or": 377, - "it": 378, - "Ġp": 379, - "Ġh": 380, - "Ġand": 381, - "on": 382, - "ing": 383, - "an": 384, - "ro": 385, - "Ġm": 386, - "Ġd": 387, - "es": 388, - "Ġin": 389, - "on": 390, - "Ġto": 391, - "ou": 392, - "is": 393, - "Ġa": 394, - "ic": 395, - "ĠT": 396, - "al": 397, - "Ġl": 398, - "Ġ=": 399, - "Ġre": 400, - "Ġ\"": 401, - "es": 402, - "ĠS": 403, - "as": 404, - "al": 405, - "il": 406, - "el": 407, - "ion": 408, - "ĠA": 409, - "ĠC": 410, - "Ġ1": 411, - "ĠĊ": 412, - "ur": 413, - "ĠTh": 414, - "Ġn": 415, - "as": 416, - "Ġ@": 417, - "ec": 418, - "om": 419, - "ac": 420, - "Ġe": 421, - "Ġwas": 422, - "ĠM": 423, - "or": 424, - "an": 425, - "am": 426, - "en": 427, - "ol": 428, - "Ġin": 429, - "Ġg": 430, - "Ġ'": 431, - "ĠB": 432, - "ly": 433, - "at": 434, - "iv": 435, - "ts": 436, - "ĠThe": 437, - "us": 438, - "-@": 439, - "Ġ@-@": 440, - "is": 441, - "ĠI": 442, - "Ġwh": 443, - "ig": 444, - "ĠH": 445, - "Ġst": 446, - "os": 447, - "un": 448, - "th": 449, - "ĠP": 450, - "Ġwit": 451, - "Ġthat": 452, - "ir": 453, - "Ġas": 454, - "em": 455, - "Ġon": 456, - "ra": 457, - "Ġfor": 458, - "ĠR": 459, - "et": 460, - "ow": 461, - "Ġ2": 462, - "id": 463, - "ĠD": 464, - "le": 465, - "Ġwith": 466, - "la": 467, - "ent": 468, - "im": 469, - "ĠF": 470, - "ea": 471, - "ion": 472, - "Ġby": 473, - "Ġ)": 474, - "Ġ(": 475, - "Ġal": 476, - "Ġcon": 477, - "ent": 478, - "ĠW": 479, - "Ġis": 480, - "ere": 481, - "ĠG": 482, - "ĠN": 483, - "ĠL": 484, - "Ġha": 485, - "ers": 486, - "ri": 487, - "th": 488, - "ted": 489, - "uc": 490, - "ĠJ": 491, - "Ġ19": 492, - "ev": 493, - "ul": 494, - "Ġv": 495, - "ce": 496, - "ation": 497, - "rom": 498, - "Ġbe": 499, - "ĠE": 500, - "in": 501, - "Ġthe": 502, - "Ġfrom": 503, - "ĠO": 504, - "ter": 505, - "Ġpro": 506, - "Ġar": 507, - "ad": 508, - "Ġcom": 509, - "ic": 510, - "ag": 511, - "Ġhis": 512, - "Ġsh": 513, - "Ġat": 514, - "ov": 515, - "ies": 516, - "oo": 517, - "pp": 518, - "st": 519, - "ch": 520, - "Ġr": 521, - "Ġ20": 522, - "ay": 523, - "if": 524, - "Ġwere": 525, - "Ġch": 526, - "ut": 527, - "st": 528, - "ut": 529, - "ds": 530, - "op": 531, - "um": 532, - "Ġit": 533, - "oc": 534, - "ter": 535, - "le": 536, - "igh": 537, - "ud": 538, - "Ġex": 539, - "ions": 540, - "ate": 541, - "ity": 542, - "ated": 543, - "Ġun": 544, - "ep": 545, - "qu": 546, - "Ġno": 547, - "ĠK": 548, - "ive": 549, - "ist": 550, - "Ġon": 551, - "ame": 552, - "oun": 553, - "ir": 554, - "ab": 555, - "Ġâ": 556, - "ing": 557, - "Ġhe": 558, - "ld": 559, - "ug": 560, - "ich": 561, - "Ġan": 562, - "ed": 563, - "Ġk": 564, - "ĠâĢ": 565, - "Ġhad": 566, - "ve": 567, - "ain": 568, - "Ġse": 569, - "tion": 570, - "ore": 571, - "res": 572, - "Ġwhich": 573, - "ĠIn": 574, - "od": 575, - "ther": 576, - "ak": 577, - "Ġsp": 578, - "ar": 579, - "Ġy": 580, - "ĠCh": 581, - "ong": 582, - "Ġac": 583, - "est": 584, - "ĠU": 585, - "ap": 586, - "ff": 587, - "ally": 588, - "rit": 589, - "ĠSt": 590, - "ub": 591, - "ge": 592, - "ber": 593, - "et": 594, - "Ġbe": 595, - "ear": 596, - "Ġrec": 597, - "ers": 598, - "Ġfir": 599, - "ot": 600, - "Ġare": 601, - "Ġan": 602, - "ch": 603, - "og": 604, - "ia": 605, - "est": 606, - "ine": 607, - "ill": 608, - "and": 609, - "el": 610, - "ary": 611, - "ew": 612, - "id": 613, - "Ġfor": 614, - "Ġ;": 615, - "Ġcomp": 616, - "ĠV": 617, - "Ġinc": 618, - "tr": 619, - "Ġ200": 620, - "Ġtheir": 621, - "us": 622, - "Ġbut": 623, - "ran": 624, - "ical": 625, - "Ġfirst": 626, - "Ġde": 627, - "Ġint": 628, - "Ġro": 629, - "so": 630, - "ĠâĢĵ": 631, - "Ġnot": 632, - "ding": 633, - "fter": 634, - "ure": 635, - "Ġpar": 636, - "Ġ:": 637, - "ian": 638, - "Ġtw": 639, - "ould": 640, - "Ġalso": 641, - "Ġits": 642, - "Ġwor": 643, - "um": 644, - "Ġor": 645, - "ost": 646, - "00": 647, - "our": 648, - "ard": 649, - "Ġres": 650, - "mp": 651, - "ue": 652, - "Ġab": 653, - "ish": 654, - "Ġcont": 655, - "Ġad": 656, - "own": 657, - "all": 658, - "oug": 659, - "Ġher": 660, - "ast": 661, - "Ġen": 662, - "ome": 663, - "all": 664, - "ded": 665, - "ow": 666, - "Ġhave": 667, - "Ġus": 668, - "ear": 669, - "ack": 670, - "duc": 671, - "ial": 672, - "ss": 673, - "ents": 674, - "ain": 675, - "ting": 676, - "Ġone": 677, - "ess": 678, - "Ġhas": 679, - "ight": 680, - "av": 681, - "Ġev": 682, - "out": 683, - "ay": 684, - "ence": 685, - "Ġbeen": 686, - "ew": 687, - "Ġtwo": 688, - "Ġcl": 689, - "der": 690, - "ime": 691, - "ks": 692, - "ess": 693, - "ish": 694, - ".@": 695, - "Ġ@.@": 696, - "Ġpla": 697, - "Ġpl": 698, - "Ġor": 699, - "up": 700, - "ment": 701, - "uring": 702, - "oll": 703, - "ĠIn": 704, - "Ġthis": 705, - "Ġbec": 706, - "Ġcomm": 707, - "Ġdis": 708, - "ater": 709, - "age": 710, - "Ġapp": 711, - "ous": 712, - "ey": 713, - "il": 714, - "per": 715, - "ĠAl": 716, - "ional": 717, - "lud": 718, - "ely": 719, - "tt": 720, - "ile": 721, - "iz": 722, - "Ġj": 723, - "Ġwho": 724, - "Ġag": 725, - "ib": 726, - "Ġthey": 727, - "for": 728, - "Ġov": 729, - "ath": 730, - "eg": 731, - "Ġsc": 732, - "ip": 733, - "Ġ201": 734, - "Ġ3": 735, - "Ġper": 736, - "ory": 737, - "Ġdes": 738, - "ide": 739, - "Ġser": 740, - "se": 741, - "ĠHe": 742, - "land": 743, - "ations": 744, - "ric": 745, - "it": 746, - "res": 747, - "ered": 748, - "Ġpre": 749, - "ĠSh": 750, - "ance": 751, - "ort": 752, - "ant": 753, - ",@": 754, - "Ġ@,@": 755, - "ell": 756, - "ĠY": 757, - "ned": 758, - "ell": 759, - "ite": 760, - "Ġinclud": 761, - "Ġrep": 762, - "Ġafter": 763, - "Ġsuc": 764, - "ree": 765, - "any": 766, - "im": 767, - "ort": 768, - "Ġ18": 769, - "Ġsu": 770, - "ade": 771, - "our": 772, - "ĠUn": 773, - "ĠIt": 774, - "ik": 775, - "ĠMar": 776, - "ember": 777, - "Ġ1": 778, - "een": 779, - "and": 780, - "Ġsec": 781, - "ice": 782, - "Ġtime": 783, - "ĠAn": 784, - "Ġinto": 785, - "Ġfin": 786, - "Ġother": 787, - "Ġatt": 788, - "ill": 789, - "ren": 790, - "ach": 791, - "ass": 792, - "eral": 793, - "ese": 794, - "sh": 795, - "als": 796, - "ition": 797, - "ough": 798, - "les": 799, - "amp": 800, - "Ġwould": 801, - "Ġmore": 802, - "roug": 803, - "rib": 804, - "ery": 805, - "ace": 806, - "ĠA": 807, - "Ġplay": 808, - "ited": 809, - "ked": 810, - "ist": 811, - "ied": 812, - "Ġ2": 813, - "ased": 814, - "ings": 815, - "ang": 816, - "am": 817, - "ip": 818, - "Ġbo": 819, - "able": 820, - "ty": 821, - "Ġchar": 822, - "Ġcent": 823, - "etw": 824, - "ates": 825, - "rop": 826, - "ĠI": 827, - "und": 828, - "ĠAm": 829, - "ces": 830, - "oin": 831, - "Ġinter": 832, - "up": 833, - "ct": 834, - "one": 835, - "Ġtra": 836, - "ant": 837, - "ect": 838, - "Ġall": 839, - "ef": 840, - "Ġcons": 841, - "ubl": 842, - "ning": 843, - "ans": 844, - "Ġfe": 845, - "ust": 846, - "Ġ0": 847, - "Ġrem": 848, - "ase": 849, - "ong": 850, - "Ġwhen": 851, - "eb": 852, - "ĠWh": 853, - "Ġear": 854, - "ever": 855, - "Ġover": 856, - "Ġkn": 857, - "aus": 858, - "Ġpos": 859, - "ad": 860, - "erm": 861, - "Ġshe": 862, - "Ġra": 863, - "Ġduring": 864, - "ason": 865, - "vi": 866, - "Ġexp": 867, - "Ġlea": 868, - "Ġel": 869, - "Ġ4": 870, - "Ġonly": 871, - "ond": 872, - "Ġdec": 873, - "Ġacc": 874, - "Ġoff": 875, - "iss": 876, - "Ġfl": 877, - "ĠEn": 878, - "ot": 879, - "ens": 880, - "ose": 881, - "ake": 882, - "om": 883, - "Ġsev": 884, - "ach": 885, - "etween": 886, - "ern": 887, - "Ġ3": 888, - "Ġpr": 889, - "Ġgro": 890, - "ruc": 891, - "Ġdi": 892, - "Ġ199": 893, - "ĠAr": 894, - "Ġgame": 895, - "Ġhim": 896, - "ook": 897, - "Ġup": 898, - "Ġabout": 899, - "Ġrel": 900, - "form": 901, - "Ġthree": 902, - "att": 903, - "ĠCom": 904, - "Ġsa": 905, - "ears": 906, - "Ġ5": 907, - "ry": 908, - "Ġimp": 909, - "Ġmost": 910, - "fer": 911, - "Ġpres": 912, - "Ġfil": 913, - "Ġbetween": 914, - "Ġbeg": 915, - "ph": 916, - "ors": 917, - "Ġthan": 918, - "Ġrecor": 919, - "ob": 920, - "eric": 921, - "ating": 922, - "Ġthroug": 923, - "king": 924, - "Ġout": 925, - "Ġnum": 926, - "ood": 927, - "ollow": 928, - "act": 929, - "uil": 930, - "Ġcre": 931, - "olog": 932, - "ational": 933, - "Ġproduc": 934, - "Ġwhile": 935, - "Ġlater": 936, - "Ġwrit": 937, - "ex": 938, - "Ġstar": 939, - "Ġspec": 940, - "ee": 941, - "ished": 942, - "Ġreg": 943, - "ision": 944, - "outh": 945, - "Ġrele": 946, - "Ġass": 947, - "Ġseason": 948, - "Ġmade": 949, - "ily": 950, - "ru": 951, - "oy": 952, - "tur": 953, - "te": 954, - "Ġqu": 955, - "Ġmov": 956, - "ury": 957, - "ĠAmeric": 958, - "ement": 959, - "cc": 960, - "ound": 961, - "Ġlar": 962, - "Ġform": 963, - "ect": 964, - "Ġdef": 965, - "Ġmus": 966, - "ĠPar": 967, - "Ġme": 968, - "Ġsub": 969, - "way": 970, - "op": 971, - "oh": 972, - "eld": 973, - "ie": 974, - "emp": 975, - "ames": 976, - "ern": 977, - "Ġnor": 978, - "ived": 979, - "evel": 980, - "Ġsuch": 981, - "ards": 982, - "Ġind": 983, - "ike": 984, - "Ġgen": 985, - "ert": 986, - "Ġyear": 987, - "Ġused": 988, - "Ġnew": 989, - "Ġ5": 990, - "Ġalb": 991, - "sp": 992, - "yp": 993, - "Ġwith": 994, - "Ġwhere": 995, - "ics": 996, - "ĠThis": 997, - "Ġthem": 998, - "wn": 999 }, - "merges": [ - "Ġ t", - "Ġt h", - "Ġ a", - "Ġth e", - "i n", - "Ġ o", - "Ġ ,", - "Ġ s", - "e d", - "Ġ w", - "e r", - "Ġ .", - "Ġ i", - "r e", - "Ġ c", - "n d", - "Ġ f", - "Ġ b", - "a t", - "Ġo f", - "e r", - "e n", - "a r", - "o r", - "i t", - "Ġ p", - "Ġ h", - "Ġa nd", - "o n", - "in g", - "a n", - "r o", - "Ġ m", - "Ġ d", - "e s", - "Ġi n", - "o n", - "Ġt o", - "o u", - "i s", - "Ġ a", - "i c", - "Ġ T", - "a l", - "Ġ l", - "Ġ =", - "Ġ re", - "Ġ \"", - "e s", - "Ġ S", - "a s", - "a l", - "i l", - "e l", - "i on", - "Ġ A", - "Ġ C", - "Ġ 1", - "Ġ Ċ", - "u r", - "ĠT h", - "Ġ n", - "a s", - "Ġ @", - "e c", - "o m", - "a c", - "Ġ e", - "Ġw as", - "Ġ M", - "o r", - "a n", - "a m", - "e n", - "o l", - "Ġ in", - "Ġ g", - "Ġ '", - "Ġ B", - "l y", - "a t", - "i v", - "t s", - "ĠTh e", - "u s", - "- @", - "Ġ@ -@", - "i s", - "Ġ I", - "Ġw h", - "i g", - "Ġ H", - "Ġs t", - "o s", - "u n", - "t h", - "Ġ P", - "Ġw it", - "Ġth at", - "i r", - "Ġa s", - "e m", - "Ġo n", - "r a", - "Ġf or", - "Ġ R", - "e t", - "o w", - "Ġ 2", - "i d", - "Ġ D", - "l e", - "Ġwit h", - "l a", - "en t", - "i m", - "Ġ F", - "e a", - "i on", - "Ġb y", - "Ġ )", - "Ġ (", - "Ġa l", - "Ġc on", - "en t", - "Ġ W", - "Ġi s", - "er e", - "Ġ G", - "Ġ N", - "Ġ L", - "Ġh a", - "er s", - "r i", - "t h", - "t ed", - "u c", - "Ġ J", - "Ġ1 9", - "e v", - "u l", - "Ġ v", - "c e", - "at ion", - "ro m", - "Ġb e", - "Ġ E", - "i n", - "Ġth e", - "Ġf rom", - "Ġ O", - "t er", - "Ġp ro", - "Ġa r", - "a d", - "Ġc om", - "i c", - "a g", - "Ġh is", - "Ġs h", - "Ġa t", - "o v", - "i es", - "o o", - "p p", - "s t", - "c h", - "Ġ r", - "Ġ2 0", - "a y", - "i f", - "Ġw ere", - "Ġc h", - "u t", - "s t", - "u t", - "d s", - "o p", - "u m", - "Ġi t", - "o c", - "t er", - "l e", - "ig h", - "u d", - "Ġe x", - "ion s", - "at e", - "it y", - "at ed", - "Ġ un", - "e p", - "q u", - "Ġn o", - "Ġ K", - "iv e", - "is t", - "Ġo n", - "am e", - "ou n", - "i r", - "a b", - "Ġ â", - "in g", - "Ġh e", - "l d", - "u g", - "ic h", - "Ġa n", - "e d", - "Ġ k", - "Ġâ Ģ", - "Ġha d", - "v e", - "a in", - "Ġs e", - "t ion", - "or e", - "re s", - "Ġwh ich", - "ĠI n", - "o d", - "th er", - "a k", - "Ġs p", - "a r", - "Ġ y", - "ĠC h", - "on g", - "Ġa c", - "es t", - "Ġ U", - "a p", - "f f", - "al ly", - "r it", - "ĠS t", - "u b", - "g e", - "b er", - "e t", - "Ġb e", - "e ar", - "Ġre c", - "er s", - "Ġf ir", - "o t", - "Ġar e", - "Ġa n", - "c h", - "o g", - "i a", - "es t", - "in e", - "il l", - "an d", - "e l", - "ar y", - "e w", - "i d", - "Ġf or", - "Ġ ;", - "Ġcom p", - "Ġ V", - "Ġin c", - "t r", - "Ġ20 0", - "Ġthe ir", - "u s", - "Ġb ut", - "r an", - "ic al", - "Ġfir st", - "Ġd e", - "Ġin t", - "Ġ ro", - "s o", - "ĠâĢ ĵ", - "Ġno t", - "d ing", - "f ter", - "ur e", - "Ġp ar", - "Ġ :", - "i an", - "Ġt w", - "ou ld", - "Ġal so", - "Ġi ts", - "Ġw or", - "u m", - "Ġo r", - "os t", - "0 0", - "ou r", - "ar d", - "Ġre s", - "m p", - "u e", - "Ġa b", - "is h", - "Ġcon t", - "Ġa d", - "ow n", - "al l", - "ou g", - "Ġh er", - "as t", - "Ġ en", - "om e", - "al l", - "d ed", - "o w", - "Ġha ve", - "Ġ us", - "ea r", - "ac k", - "d uc", - "i al", - "s s", - "en ts", - "a in", - "t ing", - "Ġon e", - "es s", - "Ġh as", - "igh t", - "a v", - "Ġe v", - "ou t", - "a y", - "en ce", - "Ġbe en", - "e w", - "Ġtw o", - "Ġc l", - "d er", - "im e", - "k s", - "es s", - "is h", - ". @", - "Ġ@ .@", - "Ġp la", - "Ġp l", - "Ġo r", - "u p", - "m ent", - "ur ing", - "ol l", - "ĠI n", - "Ġth is", - "Ġb ec", - "Ġcom m", - "Ġd is", - "at er", - "ag e", - "Ġa pp", - "ou s", - "e y", - "i l", - "p er", - "ĠA l", - "ion al", - "l ud", - "el y", - "t t", - "il e", - "i z", - "Ġ j", - "Ġwh o", - "Ġa g", - "i b", - "Ġthe y", - "f or", - "Ġo v", - "at h", - "e g", - "Ġs c", - "i p", - "Ġ20 1", - "Ġ 3", - "Ġp er", - "or y", - "Ġd es", - "id e", - "Ġs er", - "s e", - "ĠH e", - "la nd", - "at ions", - "r ic", - "i t", - "re s", - "er ed", - "Ġp re", - "ĠS h", - "an ce", - "or t", - "an t", - ", @", - "Ġ@ ,@", - "el l", - "Ġ Y", - "n ed", - "el l", - "it e", - "Ġinc lud", - "Ġre p", - "Ġa fter", - "Ġs uc", - "re e", - "an y", - "i m", - "or t", - "Ġ1 8", - "Ġs u", - "ad e", - "ou r", - "ĠU n", - "ĠI t", - "i k", - "ĠM ar", - "em ber", - "Ġ 1", - "e en", - "a nd", - "Ġs ec", - "ic e", - "Ġt ime", - "ĠA n", - "Ġint o", - "Ġf in", - "Ġo ther", - "Ġa tt", - "il l", - "re n", - "ac h", - "as s", - "er al", - "es e", - "s h", - "al s", - "it ion", - "oug h", - "l es", - "am p", - "Ġw ould", - "Ġm ore", - "ro ug", - "ri b", - "er y", - "ac e", - "Ġ A", - "Ġpla y", - "it ed", - "k ed", - "is t", - "i ed", - "Ġ 2", - "as ed", - "ing s", - "an g", - "a m", - "i p", - "Ġb o", - "ab le", - "t y", - "Ġch ar", - "Ġc ent", - "et w", - "at es", - "ro p", - "Ġ I", - "u nd", - "ĠA m", - "c es", - "o in", - "Ġin ter", - "u p", - "c t", - "on e", - "Ġt ra", - "an t", - "ec t", - "Ġal l", - "e f", - "Ġcon s", - "ub l", - "n ing", - "an s", - "Ġf e", - "us t", - "Ġ 0", - "Ġre m", - "as e", - "on g", - "Ġwh en", - "e b", - "ĠW h", - "Ġe ar", - "ev er", - "Ġov er", - "Ġk n", - "a us", - "Ġp os", - "a d", - "er m", - "Ġsh e", - "Ġ ra", - "Ġd uring", - "as on", - "v i", - "Ġex p", - "Ġl ea", - "Ġ el", - "Ġ 4", - "Ġon ly", - "o nd", - "Ġd ec", - "Ġac c", - "Ġo ff", - "is s", - "Ġf l", - "ĠE n", - "o t", - "en s", - "os e", - "ak e", - "o m", - "Ġs ev", - "ac h", - "etw een", - "er n", - "Ġ 3", - "Ġp r", - "Ġg ro", - "r uc", - "Ġd i", - "Ġ19 9", - "ĠA r", - "Ġg ame", - "Ġh im", - "oo k", - "Ġ up", - "Ġab out", - "Ġre l", - "for m", - "Ġth ree", - "at t", - "ĠC om", - "Ġs a", - "ear s", - "Ġ 5", - "r y", - "Ġi mp", - "Ġm ost", - "f er", - "Ġp res", - "Ġf il", - "Ġb etween", - "Ġbe g", - "p h", - "or s", - "Ġth an", - "Ġrec or", - "o b", - "er ic", - "at ing", - "Ġth roug", - "k ing", - "Ġo ut", - "Ġn um", - "oo d", - "oll ow", - "ac t", - "u il", - "Ġc re", - "ol og", - "at ional", - "Ġpro duc", - "Ġwh ile", - "Ġl ater", - "Ġw rit", - "e x", - "Ġst ar", - "Ġsp ec", - "e e", - "ish ed", - "Ġre g", - "is ion", - "ou th", - "Ġre le", - "Ġa ss", - "Ġse ason", - "Ġm ade", - "il y", - "r u", - "o y", - "t ur", - "t e", - "Ġ qu", - "Ġm ov", - "ur y", - "ĠAm eric", - "em ent", - "c c", - "ou nd", - "Ġl ar", - "Ġfor m", - "ec t", - "Ġde f", - "Ġm us", - "ĠP ar", - "Ġm e", - "Ġs ub", - "w ay", - "o p", - "o h", - "el d", - "i e", - "em p", - "am es", - "er n", - "Ġn or", - "iv ed", - "ev el", - "Ġsuc h", - "ar ds", - "Ġin d", - "ik e", - "Ġg en", - "er t", - "Ġy ear", - "Ġus ed", - "Ġn ew", - "Ġ 5", - "Ġal b", - "s p", - "y p", - "Ġwit h", - "Ġwh ere", - "ic s", - "ĠTh is", - "Ġthe m", - "w n" - ] - } -} \ No newline at end of file + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "<|endoftext|>", + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "fuse_unk": false, + "vocab": { + "<|startoftext|>": 0, + "<|endoftext|>": 1, + "!": 2, + "\"": 3, + "#": 4, + "$": 5, + "%": 6, + "&": 7, + "'": 8, + "(": 9, + ")": 10, + "*": 11, + "+": 12, + ",": 13, + "-": 14, + ".": 15, + "/": 16, + "0": 17, + "1": 18, + "2": 19, + "3": 20, + "4": 21, + "5": 22, + "6": 23, + "7": 24, + "8": 25, + "9": 26, + ":": 27, + ";": 28, + "<": 29, + "=": 30, + ">": 31, + "?": 32, + "@": 33, + "A": 34, + "B": 35, + "C": 36, + "D": 37, + "E": 38, + "F": 39, + "G": 40, + "H": 41, + "I": 42, + "J": 43, + "K": 44, + "L": 45, + "M": 46, + "N": 47, + "O": 48, + "P": 49, + "Q": 50, + "R": 51, + "S": 52, + "T": 53, + "U": 54, + "V": 55, + "W": 56, + "X": 57, + "Y": 58, + "Z": 59, + "[": 60, + "\\": 61, + "]": 62, + "^": 63, + "_": 64, + "`": 65, + "a": 66, + "b": 67, + "c": 68, + "d": 69, + "e": 70, + "f": 71, + "g": 72, + "h": 73, + "i": 74, + "j": 75, + "k": 76, + "l": 77, + "m": 78, + "n": 79, + "o": 80, + "p": 81, + "q": 82, + "r": 83, + "s": 84, + "t": 85, + "u": 86, + "v": 87, + "w": 88, + "x": 89, + "y": 90, + "z": 91, + "|": 92 + }, + "merges": [] + } +} diff --git a/vocab.json b/vocab.json index afb3fdc..bd14b1d 100644 --- a/vocab.json +++ b/vocab.json @@ -1 +1,95 @@ -{"<|startoftext|>":0,"<|endoftext|>":1,"!":2,"\"":3,"#":4,"$":5,"%":6,"&":7,"'":8,"(":9,")":10,"*":11,"+":12,",":13,"-":14,".":15,"/":16,"0":17,"1":18,"2":19,"3":20,"4":21,"5":22,"6":23,"7":24,"8":25,"9":26,":":27,";":28,"<":29,"=":30,">":31,"?":32,"@":33,"A":34,"B":35,"C":36,"D":37,"E":38,"F":39,"G":40,"H":41,"I":42,"J":43,"K":44,"L":45,"M":46,"N":47,"O":48,"P":49,"Q":50,"R":51,"S":52,"T":53,"U":54,"V":55,"W":56,"X":57,"Y":58,"Z":59,"[":60,"\\":61,"]":62,"^":63,"_":64,"`":65,"a":66,"b":67,"c":68,"d":69,"e":70,"f":71,"g":72,"h":73,"i":74,"j":75,"k":76,"l":77,"m":78,"n":79,"o":80,"p":81,"q":82,"r":83,"s":84,"t":85,"u":86,"v":87,"w":88,"x":89,"y":90,"z":91,"|":92,"}":93,"~":94,"¡":95,"¢":96,"£":97,"¤":98,"¥":99,"¦":100,"§":101,"¨":102,"©":103,"ª":104,"«":105,"¬":106,"®":107,"¯":108,"°":109,"±":110,"²":111,"³":112,"´":113,"µ":114,"¶":115,"·":116,"¸":117,"¹":118,"º":119,"»":120,"¼":121,"½":122,"¾":123,"¿":124,"Â":125,"Ã":126,"Ä":127,"Å":128,"Æ":129,"Ç":130,"È":131,"É":132,"Ê":133,"Ë":134,"Ì":135,"Í":136,"Î":137,"Ï":138,"Ð":139,"Ñ":140,"Ö":141,"×":142,"Ø":143,"Ù":144,"Ü":145,"à":146,"á":147,"â":148,"ã":149,"ä":150,"å":151,"æ":152,"ç":153,"è":154,"é":155,"ë":156,"ì":157,"ï":158,"Ċ":159,"Ġ":160,"Ģ":161,"ģ":162,"Ĥ":163,"ĥ":164,"Ħ":165,"ħ":166,"Ĩ":167,"ĩ":168,"Ī":169,"ī":170,"Ĭ":171,"ĭ":172,"Į":173,"į":174,"İ":175,"ı":176,"IJ":177,"ij":178,"Ĵ":179,"ĵ":180,"Ķ":181,"ķ":182,"ĸ":183,"Ĺ":184,"ĺ":185,"Ļ":186,"ļ":187,"Ľ":188,"ľ":189,"Ŀ":190,"ŀ":191,"Ł":192,"ł":193,"Ń":194,"e":195,"d":196,"a":197,"o":198,"n":199,"±":200,"l":201,"m":202,"h":203,"r":204,"i":205,"s":206,"Z":207,"t":208,"f":209,"k":210,"y":211,"b":212,"F":213,"g":214,"7":215,"0":216,"p":217,"L":218,"H":219,"¡":220,"Ī":221,"1":222,"Ģ":223,"c":224,"ĩ":225,"6":226,"A":227,"z":228,"u":229,"S":230,"2":231,"v":232,"4":233,"M":234,"T":235,"8":236,"I":237,"N":238,"C":239,"5":240,"¹":241,"9":242,"3":243,"ī":244,"P":245,"E":246,"»":247,"V":248,"İ":249,"w":250,"J":251,"ł":252,".":253,"K":254,"D":255,"Ķ":256,"¸":257,"B":258,"©":259,"º":260,"µ":261,"Ĥ":262,"X":263,"R":264,"O":265,"«":266,"Ļ":267,"U":268,"x":269,"[":270,"¿":271,"³":272,"ģ":273,"W":274,"§":275,"-":276,"ĸ":277,"Ħ":278,",":279,"q":280,"ħ":281,"¨":282,"G":283,"²":284,"ĺ":285,"ª":286,"¯":287,"j":288,"]":289,"ļ":290,"Ŀ":291,"¤":292,"ŀ":293,"½":294,"IJ":295,"'":296,"Ń":297,"°":298,"ľ":299,">":300,"¶":301,"į":302,"¦":303,"|":304,"¼":305,"¢":306,"´":307,"Ĩ":308,"Q":309,"Y":310,"Ľ":311,"ĵ":312,"ij":313,"ķ":314,"Ĭ":315,"¾":316,";":317,"(":318,"¬":319,"@":320,"ĭ":321,"Ĺ":322,"£":323,"Į":324,"#":325,"·":326,"*":327,"Ĵ":328,"®":329,")":330,"^":331,"ı":332,"Ġ":333,"_":334,"Ł":335,"}":336,"ĥ":337,"\\":338,"¥":339,"<":340,"+":341,"=":342,"~":343,"\"":344,"!":345,"?":346,"`":347,"$":348,"Ċ":349,"/":350,"%":351,"&":352,":":353,"Ġt":354,"Ġth":355,"Ġa":356,"Ġthe":357,"in":358,"Ġo":359,"Ġ,":360,"Ġs":361,"ed":362,"Ġw":363,"er":364,"Ġ.":365,"Ġi":366,"re":367,"Ġc":368,"nd":369,"Ġf":370,"Ġb":371,"at":372,"Ġof":373,"er":374,"en":375,"ar":376,"or":377,"it":378,"Ġp":379,"Ġh":380,"Ġand":381,"on":382,"ing":383,"an":384,"ro":385,"Ġm":386,"Ġd":387,"es":388,"Ġin":389,"on":390,"Ġto":391,"ou":392,"is":393,"Ġa":394,"ic":395,"ĠT":396,"al":397,"Ġl":398,"Ġ=":399,"Ġre":400,"Ġ\"":401,"es":402,"ĠS":403,"as":404,"al":405,"il":406,"el":407,"ion":408,"ĠA":409,"ĠC":410,"Ġ1":411,"ĠĊ":412,"ur":413,"ĠTh":414,"Ġn":415,"as":416,"Ġ@":417,"ec":418,"om":419,"ac":420,"Ġe":421,"Ġwas":422,"ĠM":423,"or":424,"an":425,"am":426,"en":427,"ol":428,"Ġin":429,"Ġg":430,"Ġ'":431,"ĠB":432,"ly":433,"at":434,"iv":435,"ts":436,"ĠThe":437,"us":438,"-@":439,"Ġ@-@":440,"is":441,"ĠI":442,"Ġwh":443,"ig":444,"ĠH":445,"Ġst":446,"os":447,"un":448,"th":449,"ĠP":450,"Ġwit":451,"Ġthat":452,"ir":453,"Ġas":454,"em":455,"Ġon":456,"ra":457,"Ġfor":458,"ĠR":459,"et":460,"ow":461,"Ġ2":462,"id":463,"ĠD":464,"le":465,"Ġwith":466,"la":467,"ent":468,"im":469,"ĠF":470,"ea":471,"ion":472,"Ġby":473,"Ġ)":474,"Ġ(":475,"Ġal":476,"Ġcon":477,"ent":478,"ĠW":479,"Ġis":480,"ere":481,"ĠG":482,"ĠN":483,"ĠL":484,"Ġha":485,"ers":486,"ri":487,"th":488,"ted":489,"uc":490,"ĠJ":491,"Ġ19":492,"ev":493,"ul":494,"Ġv":495,"ce":496,"ation":497,"rom":498,"Ġbe":499,"ĠE":500,"in":501,"Ġthe":502,"Ġfrom":503,"ĠO":504,"ter":505,"Ġpro":506,"Ġar":507,"ad":508,"Ġcom":509,"ic":510,"ag":511,"Ġhis":512,"Ġsh":513,"Ġat":514,"ov":515,"ies":516,"oo":517,"pp":518,"st":519,"ch":520,"Ġr":521,"Ġ20":522,"ay":523,"if":524,"Ġwere":525,"Ġch":526,"ut":527,"st":528,"ut":529,"ds":530,"op":531,"um":532,"Ġit":533,"oc":534,"ter":535,"le":536,"igh":537,"ud":538,"Ġex":539,"ions":540,"ate":541,"ity":542,"ated":543,"Ġun":544,"ep":545,"qu":546,"Ġno":547,"ĠK":548,"ive":549,"ist":550,"Ġon":551,"ame":552,"oun":553,"ir":554,"ab":555,"Ġâ":556,"ing":557,"Ġhe":558,"ld":559,"ug":560,"ich":561,"Ġan":562,"ed":563,"Ġk":564,"ĠâĢ":565,"Ġhad":566,"ve":567,"ain":568,"Ġse":569,"tion":570,"ore":571,"res":572,"Ġwhich":573,"ĠIn":574,"od":575,"ther":576,"ak":577,"Ġsp":578,"ar":579,"Ġy":580,"ĠCh":581,"ong":582,"Ġac":583,"est":584,"ĠU":585,"ap":586,"ff":587,"ally":588,"rit":589,"ĠSt":590,"ub":591,"ge":592,"ber":593,"et":594,"Ġbe":595,"ear":596,"Ġrec":597,"ers":598,"Ġfir":599,"ot":600,"Ġare":601,"Ġan":602,"ch":603,"og":604,"ia":605,"est":606,"ine":607,"ill":608,"and":609,"el":610,"ary":611,"ew":612,"id":613,"Ġfor":614,"Ġ;":615,"Ġcomp":616,"ĠV":617,"Ġinc":618,"tr":619,"Ġ200":620,"Ġtheir":621,"us":622,"Ġbut":623,"ran":624,"ical":625,"Ġfirst":626,"Ġde":627,"Ġint":628,"Ġro":629,"so":630,"ĠâĢĵ":631,"Ġnot":632,"ding":633,"fter":634,"ure":635,"Ġpar":636,"Ġ:":637,"ian":638,"Ġtw":639,"ould":640,"Ġalso":641,"Ġits":642,"Ġwor":643,"um":644,"Ġor":645,"ost":646,"00":647,"our":648,"ard":649,"Ġres":650,"mp":651,"ue":652,"Ġab":653,"ish":654,"Ġcont":655,"Ġad":656,"own":657,"all":658,"oug":659,"Ġher":660,"ast":661,"Ġen":662,"ome":663,"all":664,"ded":665,"ow":666,"Ġhave":667,"Ġus":668,"ear":669,"ack":670,"duc":671,"ial":672,"ss":673,"ents":674,"ain":675,"ting":676,"Ġone":677,"ess":678,"Ġhas":679,"ight":680,"av":681,"Ġev":682,"out":683,"ay":684,"ence":685,"Ġbeen":686,"ew":687,"Ġtwo":688,"Ġcl":689,"der":690,"ime":691,"ks":692,"ess":693,"ish":694,".@":695,"Ġ@.@":696,"Ġpla":697,"Ġpl":698,"Ġor":699,"up":700,"ment":701,"uring":702,"oll":703,"ĠIn":704,"Ġthis":705,"Ġbec":706,"Ġcomm":707,"Ġdis":708,"ater":709,"age":710,"Ġapp":711,"ous":712,"ey":713,"il":714,"per":715,"ĠAl":716,"ional":717,"lud":718,"ely":719,"tt":720,"ile":721,"iz":722,"Ġj":723,"Ġwho":724,"Ġag":725,"ib":726,"Ġthey":727,"for":728,"Ġov":729,"ath":730,"eg":731,"Ġsc":732,"ip":733,"Ġ201":734,"Ġ3":735,"Ġper":736,"ory":737,"Ġdes":738,"ide":739,"Ġser":740,"se":741,"ĠHe":742,"land":743,"ations":744,"ric":745,"it":746,"res":747,"ered":748,"Ġpre":749,"ĠSh":750,"ance":751,"ort":752,"ant":753,",@":754,"Ġ@,@":755,"ell":756,"ĠY":757,"ned":758,"ell":759,"ite":760,"Ġinclud":761,"Ġrep":762,"Ġafter":763,"Ġsuc":764,"ree":765,"any":766,"im":767,"ort":768,"Ġ18":769,"Ġsu":770,"ade":771,"our":772,"ĠUn":773,"ĠIt":774,"ik":775,"ĠMar":776,"ember":777,"Ġ1":778,"een":779,"and":780,"Ġsec":781,"ice":782,"Ġtime":783,"ĠAn":784,"Ġinto":785,"Ġfin":786,"Ġother":787,"Ġatt":788,"ill":789,"ren":790,"ach":791,"ass":792,"eral":793,"ese":794,"sh":795,"als":796,"ition":797,"ough":798,"les":799,"amp":800,"Ġwould":801,"Ġmore":802,"roug":803,"rib":804,"ery":805,"ace":806,"ĠA":807,"Ġplay":808,"ited":809,"ked":810,"ist":811,"ied":812,"Ġ2":813,"ased":814,"ings":815,"ang":816,"am":817,"ip":818,"Ġbo":819,"able":820,"ty":821,"Ġchar":822,"Ġcent":823,"etw":824,"ates":825,"rop":826,"ĠI":827,"und":828,"ĠAm":829,"ces":830,"oin":831,"Ġinter":832,"up":833,"ct":834,"one":835,"Ġtra":836,"ant":837,"ect":838,"Ġall":839,"ef":840,"Ġcons":841,"ubl":842,"ning":843,"ans":844,"Ġfe":845,"ust":846,"Ġ0":847,"Ġrem":848,"ase":849,"ong":850,"Ġwhen":851,"eb":852,"ĠWh":853,"Ġear":854,"ever":855,"Ġover":856,"Ġkn":857,"aus":858,"Ġpos":859,"ad":860,"erm":861,"Ġshe":862,"Ġra":863,"Ġduring":864,"ason":865,"vi":866,"Ġexp":867,"Ġlea":868,"Ġel":869,"Ġ4":870,"Ġonly":871,"ond":872,"Ġdec":873,"Ġacc":874,"Ġoff":875,"iss":876,"Ġfl":877,"ĠEn":878,"ot":879,"ens":880,"ose":881,"ake":882,"om":883,"Ġsev":884,"ach":885,"etween":886,"ern":887,"Ġ3":888,"Ġpr":889,"Ġgro":890,"ruc":891,"Ġdi":892,"Ġ199":893,"ĠAr":894,"Ġgame":895,"Ġhim":896,"ook":897,"Ġup":898,"Ġabout":899,"Ġrel":900,"form":901,"Ġthree":902,"att":903,"ĠCom":904,"Ġsa":905,"ears":906,"Ġ5":907,"ry":908,"Ġimp":909,"Ġmost":910,"fer":911,"Ġpres":912,"Ġfil":913,"Ġbetween":914,"Ġbeg":915,"ph":916,"ors":917,"Ġthan":918,"Ġrecor":919,"ob":920,"eric":921,"ating":922,"Ġthroug":923,"king":924,"Ġout":925,"Ġnum":926,"ood":927,"ollow":928,"act":929,"uil":930,"Ġcre":931,"olog":932,"ational":933,"Ġproduc":934,"Ġwhile":935,"Ġlater":936,"Ġwrit":937,"ex":938,"Ġstar":939,"Ġspec":940,"ee":941,"ished":942,"Ġreg":943,"ision":944,"outh":945,"Ġrele":946,"Ġass":947,"Ġseason":948,"Ġmade":949,"ily":950,"ru":951,"oy":952,"tur":953,"te":954,"Ġqu":955,"Ġmov":956,"ury":957,"ĠAmeric":958,"ement":959,"cc":960,"ound":961,"Ġlar":962,"Ġform":963,"ect":964,"Ġdef":965,"Ġmus":966,"ĠPar":967,"Ġme":968,"Ġsub":969,"way":970,"op":971,"oh":972,"eld":973,"ie":974,"emp":975,"ames":976,"ern":977,"Ġnor":978,"ived":979,"evel":980,"Ġsuch":981,"ards":982,"Ġind":983,"ike":984,"Ġgen":985,"ert":986,"Ġyear":987,"Ġused":988,"Ġnew":989,"Ġ5":990,"Ġalb":991,"sp":992,"yp":993,"Ġwith":994,"Ġwhere":995,"ics":996,"ĠThis":997,"Ġthem":998,"wn":999} \ No newline at end of file +{ + "<|startoftext|>": 0, + "<|endoftext|>": 1, + "!": 2, + "\"": 3, + "#": 4, + "$": 5, + "%": 6, + "&": 7, + "'": 8, + "(": 9, + ")": 10, + "*": 11, + "+": 12, + ",": 13, + "-": 14, + ".": 15, + "/": 16, + "0": 17, + "1": 18, + "2": 19, + "3": 20, + "4": 21, + "5": 22, + "6": 23, + "7": 24, + "8": 25, + "9": 26, + ":": 27, + ";": 28, + "<": 29, + "=": 30, + ">": 31, + "?": 32, + "@": 33, + "A": 34, + "B": 35, + "C": 36, + "D": 37, + "E": 38, + "F": 39, + "G": 40, + "H": 41, + "I": 42, + "J": 43, + "K": 44, + "L": 45, + "M": 46, + "N": 47, + "O": 48, + "P": 49, + "Q": 50, + "R": 51, + "S": 52, + "T": 53, + "U": 54, + "V": 55, + "W": 56, + "X": 57, + "Y": 58, + "Z": 59, + "[": 60, + "\\": 61, + "]": 62, + "^": 63, + "_": 64, + "`": 65, + "a": 66, + "b": 67, + "c": 68, + "d": 69, + "e": 70, + "f": 71, + "g": 72, + "h": 73, + "i": 74, + "j": 75, + "k": 76, + "l": 77, + "m": 78, + "n": 79, + "o": 80, + "p": 81, + "q": 82, + "r": 83, + "s": 84, + "t": 85, + "u": 86, + "v": 87, + "w": 88, + "x": 89, + "y": 90, + "z": 91, + "|": 92 +}