Keeping the vocab to 99.

Update the tokenizer for 4.17.
Adding tf weights.
2022-02-23 11:44:13 +01:00 · 2022-02-23 11:20:43 +01:00 · 2022-02-17 14:50:44 +01:00 · 2022-02-16 18:17:38 +01:00 · 2022-02-16 18:16:09 +01:00 · 2022-02-16 18:15:43 +01:00
10 changed files with 597 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,3 @@
+---
+pipeline_tag: zero-shot-image-classification
+---
--- a/config.json
+++ b/config.json
@ -0,0 +1,302 @@
+{
+  "_name_or_path": "../tiny-random-clip-zero-shot-image-classification/",
+  "architectures": [
+    "CLIPModel"
+  ],
+  "initializer_factor": 1.0,
+  "logit_scale_init_value": 2.6592,
+  "model_type": "clip",
+  "projection_dim": 64,
+  "text_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.1,
+    "bad_words_ids": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "gradient_checkpointing": false,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 32,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 37,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 512,
+    "min_length": 0,
+    "model_type": "clip_text_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 4,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 5,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.17.0.dev0",
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "vocab_size": 99
+  },
+  "text_config_dict": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.1,
+    "bad_words_ids": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "gradient_checkpointing": false,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 32,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 37,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 512,
+    "min_length": 0,
+    "model_type": "clip_text_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 4,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 5,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.11.0.dev0",
+    "use_bfloat16": false,
+    "vocab_size": 99
+  },
+  "transformers_version": null,
+  "vision_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.1,
+    "bad_words_ids": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "gradient_checkpointing": false,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 32,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 30,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 37,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "clip_vision_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 4,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_hidden_layers": 5,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 2,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.17.0.dev0",
+    "typical_p": 1.0,
+    "use_bfloat16": false
+  },
+  "vision_config_dict": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.1,
+    "bad_words_ids": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "gradient_checkpointing": false,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 32,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 30,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 37,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "clip_vision_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 4,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_hidden_layers": 5,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 2,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.11.0.dev0",
+    "use_bfloat16": false
+  },
+  "vocab_size": 1000
+}
--- a/merges.txt
+++ b/merges.txt
@ -0,0 +1 @@
+#version: 0.2 - Trained by `huggingface/tokenizers`
--- a/preprocessor_config.json
+++ b/preprocessor_config.json
@ -0,0 +1,11 @@
+{
+    "crop_size": 30,
+    "do_center_crop": true,
+    "do_normalize": true,
+    "do_resize": true,
+    "feature_extractor_type": "CLIPFeatureExtractor",
+    "image_mean": [0.48145466, 0.4578275, 0.40821073],
+    "image_std": [0.26862954, 0.26130258, 0.27577711],
+    "resample": 3,
+    "size": 30
+}
--- a/pytorch_model.bin
+++ b/pytorch_model.bin
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@ -0,0 +1 @@
+{"bos_token": {"content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "<|endoftext|>"}
--- a/tf_model.h5
+++ b/tf_model.h5
--- a/tokenizer.json
+++ b/tokenizer.json
@ -0,0 +1,177 @@
+{
+    "version": "1.0",
+    "truncation": null,
+    "padding": null,
+    "added_tokens": [
+        {
+            "id": 0,
+            "special": true,
+            "content": "<|startoftext|>",
+            "single_word": false,
+            "lstrip": false,
+            "rstrip": false,
+            "normalized": true
+        },
+        {
+            "id": 1,
+            "special": true,
+            "content": "<|endoftext|>",
+            "single_word": false,
+            "lstrip": false,
+            "rstrip": false,
+            "normalized": false
+        }
+    ],
+    "normalizer": {
+        "type": "Sequence",
+        "normalizers": [
+            {
+                "type": "NFC"
+            },
+            {
+                "type": "Replace",
+                "pattern": {
+                    "Regex": "\\s+"
+                },
+                "content": " "
+            },
+            {
+                "type": "Lowercase"
+            }
+        ]
+    },
+    "pre_tokenizer": {
+        "type": "Sequence",
+        "pretokenizers": [
+            {
+                "type": "Split",
+                "pattern": {
+                    "Regex": "'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+"
+                },
+                "behavior": "Removed",
+                "invert": true
+            },
+            {
+                "type": "ByteLevel",
+                "add_prefix_space": false,
+                "trim_offsets": true
+            }
+        ]
+    },
+    "post_processor": {
+        "type": "RobertaProcessing",
+        "sep": ["<|endoftext|>", 1],
+        "cls": ["<|startoftext|>", 0],
+        "trim_offsets": false,
+        "add_prefix_space": false
+    },
+    "decoder": {
+        "type": "ByteLevel",
+        "add_prefix_space": true,
+        "trim_offsets": true
+    },
+    "model": {
+        "type": "BPE",
+        "dropout": null,
+        "unk_token": "<|endoftext|>",
+        "continuing_subword_prefix": "",
+        "end_of_word_suffix": "</w>",
+        "fuse_unk": false,
+        "vocab": {
+            "<|startoftext|>": 0,
+            "<|endoftext|>": 1,
+            "!": 2,
+            "\"": 3,
+            "#": 4,
+            "$": 5,
+            "%": 6,
+            "&": 7,
+            "'": 8,
+            "(": 9,
+            ")": 10,
+            "*": 11,
+            "+": 12,
+            ",": 13,
+            "-": 14,
+            ".": 15,
+            "/": 16,
+            "0": 17,
+            "1": 18,
+            "2": 19,
+            "3": 20,
+            "4": 21,
+            "5": 22,
+            "6": 23,
+            "7": 24,
+            "8": 25,
+            "9": 26,
+            ":": 27,
+            ";": 28,
+            "<": 29,
+            "=": 30,
+            ">": 31,
+            "?": 32,
+            "@": 33,
+            "A": 34,
+            "B": 35,
+            "C": 36,
+            "D": 37,
+            "E": 38,
+            "F": 39,
+            "G": 40,
+            "H": 41,
+            "I": 42,
+            "J": 43,
+            "K": 44,
+            "L": 45,
+            "M": 46,
+            "N": 47,
+            "O": 48,
+            "P": 49,
+            "Q": 50,
+            "R": 51,
+            "S": 52,
+            "T": 53,
+            "U": 54,
+            "V": 55,
+            "W": 56,
+            "X": 57,
+            "Y": 58,
+            "Z": 59,
+            "[": 60,
+            "\\": 61,
+            "]": 62,
+            "^": 63,
+            "_": 64,
+            "`": 65,
+            "a": 66,
+            "b": 67,
+            "c": 68,
+            "d": 69,
+            "e": 70,
+            "f": 71,
+            "g": 72,
+            "h": 73,
+            "i": 74,
+            "j": 75,
+            "k": 76,
+            "l": 77,
+            "m": 78,
+            "n": 79,
+            "o": 80,
+            "p": 81,
+            "q": 82,
+            "r": 83,
+            "s": 84,
+            "t": 85,
+            "u": 86,
+            "v": 87,
+            "w": 88,
+            "x": 89,
+            "y": 90,
+            "z": 91,
+            "|": 92
+        },
+        "merges": []
+    }
+}
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@ -0,0 +1 @@
+{"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": "<|endoftext|>", "add_prefix_space": false, "errors": "replace", "do_lower_case": true, "name_or_path": "hf-internal-testing/tiny-random-clip", "model_max_length": 77, "special_tokens_map_file": "/home/lysandre/.cache/huggingface/transformers/18a566598f286c9139f88160c99f84eec492a26bd22738fa9cb44d5b7e0a5c76.cce1206abbad28826f000510f22f354e53e66a97f7c23745a7dfe27609cc07f5", "from_slow": true, "tokenizer_class": "CLIPTokenizer"}
--- a/vocab.json
+++ b/vocab.json
@ -0,0 +1,95 @@
+{
+    "<|startoftext|>": 0,
+    "<|endoftext|>": 1,
+    "!": 2,
+    "\"": 3,
+    "#": 4,
+    "$": 5,
+    "%": 6,
+    "&": 7,
+    "'": 8,
+    "(": 9,
+    ")": 10,
+    "*": 11,
+    "+": 12,
+    ",": 13,
+    "-": 14,
+    ".": 15,
+    "/": 16,
+    "0": 17,
+    "1": 18,
+    "2": 19,
+    "3": 20,
+    "4": 21,
+    "5": 22,
+    "6": 23,
+    "7": 24,
+    "8": 25,
+    "9": 26,
+    ":": 27,
+    ";": 28,
+    "<": 29,
+    "=": 30,
+    ">": 31,
+    "?": 32,
+    "@": 33,
+    "A": 34,
+    "B": 35,
+    "C": 36,
+    "D": 37,
+    "E": 38,
+    "F": 39,
+    "G": 40,
+    "H": 41,
+    "I": 42,
+    "J": 43,
+    "K": 44,
+    "L": 45,
+    "M": 46,
+    "N": 47,
+    "O": 48,
+    "P": 49,
+    "Q": 50,
+    "R": 51,
+    "S": 52,
+    "T": 53,
+    "U": 54,
+    "V": 55,
+    "W": 56,
+    "X": 57,
+    "Y": 58,
+    "Z": 59,
+    "[": 60,
+    "\\": 61,
+    "]": 62,
+    "^": 63,
+    "_": 64,
+    "`": 65,
+    "a": 66,
+    "b": 67,
+    "c": 68,
+    "d": 69,
+    "e": 70,
+    "f": 71,
+    "g": 72,
+    "h": 73,
+    "i": 74,
+    "j": 75,
+    "k": 76,
+    "l": 77,
+    "m": 78,
+    "n": 79,
+    "o": 80,
+    "p": 81,
+    "q": 82,
+    "r": 83,
+    "s": 84,
+    "t": 85,
+    "u": 86,
+    "v": 87,
+    "w": 88,
+    "x": 89,
+    "y": 90,
+    "z": 91,
+    "|": 92
+}
Author	SHA1	Message	Date
Nicolas Patry	b965c5deee	Keeping the vocab to 99.	2022-02-23 11:44:13 +01:00
Nicolas Patry	af496b2775	Update the tokenizer for 4.17.	2022-02-23 11:20:43 +01:00
Nicolas Patry	8cdd434766	Adding tf weights.	2022-02-17 14:50:44 +01:00
Nicolas Patry	76a74407e7	Removing all merges.	2022-02-16 18:17:38 +01:00
Nicolas Patry	12ceb59e69	Going the other way and simplifying the tokenizer instead.	2022-02-16 18:16:09 +01:00
Nicolas Patry	541d169ca9	Revert "Making the vocab_size match the tokenizer vocab_size." This reverts commit `4be554b688`.	2022-02-16 18:15:43 +01:00
Nicolas Patry	4be554b688	Making the vocab_size match the tokenizer vocab_size.	2022-02-16 18:14:27 +01:00
Nicolas Patry	10da33b5d6	crop_size is after resize ??	2022-02-16 18:09:42 +01:00
Nicolas Patry	8de11cad3b	Adding pipeline_tag.	2022-02-16 18:05:41 +01:00
Nicolas Patry	d05fe019ab	Making processor + model match.	2022-02-16 18:04:29 +01:00
				`@ -0,0 +1 @@`
				#version: 0.2 - Trained by `huggingface/tokenizers`
				`@ -0,0 +1 @@`
				`{"bos_token": {"content": "<\|startoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "<\|endoftext\|>"}`
				`@ -0,0 +1 @@`
				{"unk_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<\|startoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": "<\|endoftext\|>", "add_prefix_space": false, "errors": "replace", "do_lower_case": true, "name_or_path": "hf-internal-testing/tiny-random-clip", "model_max_length": 77, "special_tokens_map_file": "/home/lysandre/.cache/huggingface/transformers/18a566598f286c9139f88160c99f84eec492a26bd22738fa9cb44d5b7e0a5c76.cce1206abbad28826f000510f22f354e53e66a97f7c23745a7dfe27609cc07f5", "from_slow": true, "tokenizer_class": "CLIPTokenizer"}