diff --git a/config.json b/config.json index 3b86bc6..05e8cf2 100644 --- a/config.json +++ b/config.json @@ -1,40 +1,36 @@ { - "activation_function": "gelu_new", - "architectures": [ - "GPTJForCausalLM" - ], - "attention_dropout": 0, - "bos_token_id": 50256, - "embed_dropout": 0, - "eos_token_id": 50256, - "jax": true, - "rotary": true, - "rotary_dim": 64, - "gradient_checkpointing": false, - "n_embd": 4096, - "initializer_range": 0.02, - "intermediate_size": null, - "layer_norm_epsilon": 1e-05, - "n_positions": 2048, - "model_type": "gptj", - "n_head": 16, - "n_layer": 28, - "rotary_dim": 64, - "summary_activation": null, - "summary_first_dropout": 0.1, - "summary_proj_to_labels": true, - "summary_type": "cls_index", - "summary_use_proj": true, - "transformers_version": "4.10.0.dev0", - "use_cache": true, - "vocab_size": 50400, - "window_size": 256, - "tokenizer_class": "GPT2Tokenizer", - "task_specific_params": { - "text-generation": { - "do_sample": true, - "temperature": 1.0, - "max_length": 50 - } + "activation_function": "gelu_new", + "architectures": [ + "GPTJForCausalLM" + ], + "attn_pdrop": 0.0, + "bos_token_id": 50256, + "embd_pdrop": 0.0, + "eos_token_id": 50256, + "gradient_checkpointing": false, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gptj", + "n_embd": 4096, + "n_head": 16, + "n_layer": 28, + "n_positions": 2048, + "rotary": true, + "rotary_dim": 64, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "transformers_version": "4.10.0.dev0", + "tokenizer_class": "GPT2Tokenizer", + "task_specific_params": { + "text-generation": { + "do_sample": true, + "temperature": 1.0, + "max_length": 50 } - } \ No newline at end of file + }, + "use_cache": true, + "vocab_size": 50400 +} \ No newline at end of file