Compare commits
No commits in common. "a0e5677e8611d87c8ce501dfa6a713e42ba2f6ef" and "3e1c92ca77a1f0f143861a854d417b16174fa9a4" have entirely different histories.
a0e5677e86
...
3e1c92ca77
|
@ -15,4 +15,3 @@
|
||||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
rust_model.ot filter=lfs diff=lfs merge=lfs -text
|
|
||||||
|
|
26
README.md
26
README.md
|
@ -4,10 +4,11 @@ language:
|
||||||
tags:
|
tags:
|
||||||
- text generation
|
- text generation
|
||||||
- pytorch
|
- pytorch
|
||||||
|
- the Pile
|
||||||
- causal-lm
|
- causal-lm
|
||||||
license: mit
|
license: apache-2.0
|
||||||
datasets:
|
datasets:
|
||||||
- the_pile
|
- the Pile
|
||||||
---
|
---
|
||||||
|
|
||||||
# GPT-Neo 2.7B
|
# GPT-Neo 2.7B
|
||||||
|
@ -22,7 +23,7 @@ GPT-Neo 2.7B was trained on the Pile, a large scale curated dataset created by E
|
||||||
|
|
||||||
## Training procedure
|
## Training procedure
|
||||||
|
|
||||||
This model was trained for 420 billion tokens over 400,000 steps. It was trained as a masked autoregressive language model, using cross-entropy loss.
|
This model was trained for 400,000 steps on the Pile. It was trained as a masked autoregressive language model, using cross-entropy loss.
|
||||||
|
|
||||||
## Intended Use and Limitations
|
## Intended Use and Limitations
|
||||||
|
|
||||||
|
@ -76,26 +77,7 @@ TBD
|
||||||
|
|
||||||
### BibTeX entry and citation info
|
### BibTeX entry and citation info
|
||||||
|
|
||||||
To cite this model, use
|
|
||||||
```bibtex
|
```bibtex
|
||||||
@software{gpt-neo,
|
|
||||||
author = {Black, Sid and
|
|
||||||
Leo, Gao and
|
|
||||||
Wang, Phil and
|
|
||||||
Leahy, Connor and
|
|
||||||
Biderman, Stella},
|
|
||||||
title = {{GPT-Neo: Large Scale Autoregressive Language
|
|
||||||
Modeling with Mesh-Tensorflow}},
|
|
||||||
month = mar,
|
|
||||||
year = 2021,
|
|
||||||
note = {{If you use this software, please cite it using
|
|
||||||
these metadata.}},
|
|
||||||
publisher = {Zenodo},
|
|
||||||
version = {1.0},
|
|
||||||
doi = {10.5281/zenodo.5297715},
|
|
||||||
url = {https://doi.org/10.5281/zenodo.5297715}
|
|
||||||
}
|
|
||||||
|
|
||||||
@article{gao2020pile,
|
@article{gao2020pile,
|
||||||
title={The Pile: An 800GB Dataset of Diverse Text for Language Modeling},
|
title={The Pile: An 800GB Dataset of Diverse Text for Language Modeling},
|
||||||
author={Gao, Leo and Biderman, Stella and Black, Sid and Golding, Laurence and Hoppe, Travis and Foster, Charles and Phang, Jason and He, Horace and Thite, Anish and Nabeshima, Noa and others},
|
author={Gao, Leo and Biderman, Stella and Black, Sid and Golding, Laurence and Hoppe, Travis and Foster, Charles and Phang, Jason and He, Horace and Thite, Anish and Nabeshima, Noa and others},
|
||||||
|
|
16
config.json
16
config.json
|
@ -65,16 +65,16 @@
|
||||||
"summary_proj_to_labels": true,
|
"summary_proj_to_labels": true,
|
||||||
"summary_type": "cls_index",
|
"summary_type": "cls_index",
|
||||||
"summary_use_proj": true,
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.5.0.dev0",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 50257,
|
||||||
|
"window_size": 256,
|
||||||
|
"tokenizer_class": "GPT2Tokenizer",
|
||||||
"task_specific_params": {
|
"task_specific_params": {
|
||||||
"text-generation": {
|
"text-generation": {
|
||||||
"do_sample": true,
|
"do_sample": true,
|
||||||
"max_length": 50,
|
"temperature": 0.9,
|
||||||
"temperature": 0.9
|
"max_length": 50
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
"tokenizer_class": "GPT2Tokenizer",
|
|
||||||
"transformers_version": "4.9.0.dev0",
|
|
||||||
"use_cache": true,
|
|
||||||
"vocab_size": 50257,
|
|
||||||
"window_size": 256
|
|
||||||
}
|
}
|
||||||
|
|
BIN
flax_model.msgpack (Stored with Git LFS)
BIN
flax_model.msgpack (Stored with Git LFS)
Binary file not shown.
BIN
rust_model.ot (Stored with Git LFS)
BIN
rust_model.ot (Stored with Git LFS)
Binary file not shown.
Loading…
Reference in New Issue