Compare commits
10 Commits
3e1c92ca77
...
a0e5677e86
Author | SHA1 | Date |
---|---|---|
|
a0e5677e86 | |
|
51568a6e0a | |
|
5e755b1c9d | |
|
6f231487a5 | |
|
88f8889ee0 | |
|
0b8087bb43 | |
|
b41a392439 | |
|
1172dffaf8 | |
|
df3bd66031 | |
|
9b4ecbcecd |
|
@ -15,3 +15,4 @@
|
||||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
rust_model.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
|
26
README.md
26
README.md
|
@ -4,11 +4,10 @@ language:
|
||||||
tags:
|
tags:
|
||||||
- text generation
|
- text generation
|
||||||
- pytorch
|
- pytorch
|
||||||
- the Pile
|
|
||||||
- causal-lm
|
- causal-lm
|
||||||
license: apache-2.0
|
license: mit
|
||||||
datasets:
|
datasets:
|
||||||
- the Pile
|
- the_pile
|
||||||
---
|
---
|
||||||
|
|
||||||
# GPT-Neo 2.7B
|
# GPT-Neo 2.7B
|
||||||
|
@ -23,7 +22,7 @@ GPT-Neo 2.7B was trained on the Pile, a large scale curated dataset created by E
|
||||||
|
|
||||||
## Training procedure
|
## Training procedure
|
||||||
|
|
||||||
This model was trained for 400,000 steps on the Pile. It was trained as a masked autoregressive language model, using cross-entropy loss.
|
This model was trained for 420 billion tokens over 400,000 steps. It was trained as a masked autoregressive language model, using cross-entropy loss.
|
||||||
|
|
||||||
## Intended Use and Limitations
|
## Intended Use and Limitations
|
||||||
|
|
||||||
|
@ -77,7 +76,26 @@ TBD
|
||||||
|
|
||||||
### BibTeX entry and citation info
|
### BibTeX entry and citation info
|
||||||
|
|
||||||
|
To cite this model, use
|
||||||
```bibtex
|
```bibtex
|
||||||
|
@software{gpt-neo,
|
||||||
|
author = {Black, Sid and
|
||||||
|
Leo, Gao and
|
||||||
|
Wang, Phil and
|
||||||
|
Leahy, Connor and
|
||||||
|
Biderman, Stella},
|
||||||
|
title = {{GPT-Neo: Large Scale Autoregressive Language
|
||||||
|
Modeling with Mesh-Tensorflow}},
|
||||||
|
month = mar,
|
||||||
|
year = 2021,
|
||||||
|
note = {{If you use this software, please cite it using
|
||||||
|
these metadata.}},
|
||||||
|
publisher = {Zenodo},
|
||||||
|
version = {1.0},
|
||||||
|
doi = {10.5281/zenodo.5297715},
|
||||||
|
url = {https://doi.org/10.5281/zenodo.5297715}
|
||||||
|
}
|
||||||
|
|
||||||
@article{gao2020pile,
|
@article{gao2020pile,
|
||||||
title={The Pile: An 800GB Dataset of Diverse Text for Language Modeling},
|
title={The Pile: An 800GB Dataset of Diverse Text for Language Modeling},
|
||||||
author={Gao, Leo and Biderman, Stella and Black, Sid and Golding, Laurence and Hoppe, Travis and Foster, Charles and Phang, Jason and He, Horace and Thite, Anish and Nabeshima, Noa and others},
|
author={Gao, Leo and Biderman, Stella and Black, Sid and Golding, Laurence and Hoppe, Travis and Foster, Charles and Phang, Jason and He, Horace and Thite, Anish and Nabeshima, Noa and others},
|
||||||
|
|
16
config.json
16
config.json
|
@ -65,16 +65,16 @@
|
||||||
"summary_proj_to_labels": true,
|
"summary_proj_to_labels": true,
|
||||||
"summary_type": "cls_index",
|
"summary_type": "cls_index",
|
||||||
"summary_use_proj": true,
|
"summary_use_proj": true,
|
||||||
"transformers_version": "4.5.0.dev0",
|
|
||||||
"use_cache": true,
|
|
||||||
"vocab_size": 50257,
|
|
||||||
"window_size": 256,
|
|
||||||
"tokenizer_class": "GPT2Tokenizer",
|
|
||||||
"task_specific_params": {
|
"task_specific_params": {
|
||||||
"text-generation": {
|
"text-generation": {
|
||||||
"do_sample": true,
|
"do_sample": true,
|
||||||
"temperature": 0.9,
|
"max_length": 50,
|
||||||
"max_length": 50
|
"temperature": 0.9
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"tokenizer_class": "GPT2Tokenizer",
|
||||||
|
"transformers_version": "4.9.0.dev0",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 50257,
|
||||||
|
"window_size": 256
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue