Upload main model (test)
This may or may not work, due to not purging ColossalAI details from model yet. If this doesn't work, we'll update the file
This commit is contained in:
parent
4c489e14d9
commit
0afbe0ce28
10
README.md
10
README.md
|
@ -1,3 +1,7 @@
|
||||||
---
|
# pygmalion-350m
|
||||||
license: other
|
|
||||||
---
|
# Model description
|
||||||
|
|
||||||
|
This is a proof-of-concept fine-tune of Facebook's OPT-350M model optimized for dialogue, to be used as a stepping stone to higher parameter models.
|
||||||
|
|
||||||
|
Disclaimer: NSFW data was included in the fine-tuning of this model. Although SFW inputs will usually result in SFW outputs, you are advised to **chat at your own risk. This model is not suitable for use by minors.**
|
|
@ -0,0 +1,28 @@
|
||||||
|
{
|
||||||
|
"_name_or_path": "opt-350m",
|
||||||
|
"activation_dropout": 0.0,
|
||||||
|
"activation_function": "relu",
|
||||||
|
"architectures": [
|
||||||
|
"OPTForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 2,
|
||||||
|
"do_layer_norm_before": false,
|
||||||
|
"dropout": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"ffn_dim": 4096,
|
||||||
|
"hidden_size": 1024,
|
||||||
|
"init_std": 0.02,
|
||||||
|
"layerdrop": 0.0,
|
||||||
|
"max_position_embeddings": 2048,
|
||||||
|
"model_type": "opt",
|
||||||
|
"num_attention_heads": 16,
|
||||||
|
"num_hidden_layers": 24,
|
||||||
|
"pad_token_id": 1,
|
||||||
|
"prefix": "</s>",
|
||||||
|
"torch_dtype": "float16",
|
||||||
|
"transformers_version": "4.20.0.dev0",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 50272,
|
||||||
|
"word_embed_proj_dim": 512
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,3 @@
|
||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:b0d55524d550fb87ac9b9a9509c97016d5ad20b4e8a87e8ac355f07c9bd21048
|
||||||
|
size 1324913007
|
|
@ -0,0 +1 @@
|
||||||
|
{"bos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}}
|
|
@ -0,0 +1 @@
|
||||||
|
{"errors": "replace", "unk_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "add_bos_token": true, "special_tokens_map_file": null, "name_or_path": "patrickvonplaten/opt-30b"}
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue