add tokenizer

This commit is contained in:
patil-suraj 2021-09-08 07:46:10 +02:00
parent 0bcfab3d3a
commit 33af1decb2
6 changed files with 48917 additions and 1 deletions

View File

@ -1,5 +1,4 @@
{
"_name_or_path": "hf_model/clip-vit-base-patch16/",
"architectures": [
"CLIPModel"
],

48895
merges.txt Normal file

File diff suppressed because it is too large Load Diff

19
preprocessor_config.json Normal file
View File

@ -0,0 +1,19 @@
{
"crop_size": 224,
"do_center_crop": true,
"do_normalize": true,
"do_resize": true,
"feature_extractor_type": "CLIPFeatureExtractor",
"image_mean": [
0.48145466,
0.4578275,
0.40821073
],
"image_std": [
0.26862954,
0.26130258,
0.27577711
],
"resample": 3,
"size": 224
}

1
special_tokens_map.json Normal file
View File

@ -0,0 +1 @@
{"bos_token": {"content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "<|endoftext|>"}

1
tokenizer_config.json Normal file
View File

@ -0,0 +1 @@
{"errors": "replace", "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": "<|endoftext|>", "add_prefix_space": false, "do_lower_case": true, "name_or_path": "openai/clip-vit-base-patch32", "model_max_length": 77, "tokenizer_class": "CLIPTokenizer"}

1
vocab.json Normal file

File diff suppressed because one or more lines are too long