Update README.md
This commit is contained in:
parent
dfa909d6a9
commit
5a3287ec7c
|
@ -8,6 +8,7 @@ thumbnail: https://huggingface.co/front/thumbnails/microsoft.png
|
||||||
license: mit
|
license: mit
|
||||||
widget:
|
widget:
|
||||||
- text: "[CLS] I love you. [SEP] I like you. [SEP]"
|
- text: "[CLS] I love you. [SEP] I like you. [SEP]"
|
||||||
|
pipeline_tag: zero-shot-classification
|
||||||
---
|
---
|
||||||
|
|
||||||
## DeBERTa: Decoding-enhanced BERT with Disentangled Attention
|
## DeBERTa: Decoding-enhanced BERT with Disentangled Attention
|
||||||
|
@ -40,9 +41,7 @@ We present the dev results on SQuAD 1.1/2.0 and several GLUE benchmark tasks.
|
||||||
```bash
|
```bash
|
||||||
cd transformers/examples/text-classification/
|
cd transformers/examples/text-classification/
|
||||||
export TASK_NAME=mrpc
|
export TASK_NAME=mrpc
|
||||||
python -m torch.distributed.launch --nproc_per_node=8 run_glue.py --model_name_or_path microsoft/deberta-v2-xxlarge \\
|
python -m torch.distributed.launch --nproc_per_node=8 run_glue.py --model_name_or_path microsoft/deberta-v2-xxlarge \\\n--task_name $TASK_NAME --do_train --do_eval --max_seq_length 128 --per_device_train_batch_size 4 \\\n--learning_rate 3e-6 --num_train_epochs 3 --output_dir /tmp/$TASK_NAME/ --overwrite_output_dir --sharded_ddp --fp16
|
||||||
--task_name $TASK_NAME --do_train --do_eval --max_seq_length 128 --per_device_train_batch_size 4 \\
|
|
||||||
--learning_rate 3e-6 --num_train_epochs 3 --output_dir /tmp/$TASK_NAME/ --overwrite_output_dir --sharded_ddp --fp16
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Citation
|
### Citation
|
||||||
|
|
Loading…
Reference in New Issue