Compare commits
10 Commits
6d4dae8314
...
0368c75d05
Author | SHA1 | Date |
---|---|---|
|
0368c75d05 | |
|
161c56eeb8 | |
|
e46fcf6d4b | |
|
11d443d60a | |
|
3cfeb44473 | |
|
05fb3e9734 | |
|
19c5f93414 | |
|
9a92a9cb04 | |
|
94b8471b9a | |
|
21449a4aa4 |
|
@ -6,3 +6,4 @@
|
||||||
*.tar.gz filter=lfs diff=lfs merge=lfs -text
|
*.tar.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
---
|
||||||
|
language:
|
||||||
|
- multilingual
|
||||||
|
|
||||||
|
datasets:
|
||||||
|
- squad
|
||||||
|
- arcd
|
||||||
|
- xquad
|
||||||
|
---
|
||||||
|
|
||||||
|
# Multilingual BERT fine-tuned on SQuADv1.1
|
||||||
|
|
||||||
|
[**WandB run link**](https://wandb.ai/salti/mBERT_QA/runs/wkqzhrp2)
|
||||||
|
|
||||||
|
**GPU**: Tesla P100-PCIE-16GB
|
||||||
|
|
||||||
|
## Training Arguments
|
||||||
|
|
||||||
|
```python
|
||||||
|
max_seq_length = 512
|
||||||
|
doc_stride = 256
|
||||||
|
max_answer_length = 64
|
||||||
|
bacth_size = 16
|
||||||
|
gradient_accumulation_steps = 2
|
||||||
|
learning_rate = 5e-5
|
||||||
|
weight_decay = 3e-7
|
||||||
|
num_train_epochs = 3
|
||||||
|
warmup_ratio = 0.1
|
||||||
|
fp16 = True
|
||||||
|
fp16_opt_level = "O1"
|
||||||
|
seed = 0
|
||||||
|
```
|
||||||
|
|
||||||
|
## Results
|
||||||
|
|
||||||
|
| EM | F1 |
|
||||||
|
| :----: | :----: |
|
||||||
|
| 81.731 | 89.009 |
|
||||||
|
|
||||||
|
## Zero-shot performance
|
||||||
|
|
||||||
|
### on ARCD
|
||||||
|
|
||||||
|
| EM | F1 |
|
||||||
|
| :----: | :----: |
|
||||||
|
| 20.655 | 48.051 |
|
||||||
|
|
||||||
|
### on XQuAD
|
||||||
|
|
||||||
|
| Language | EM | F1 |
|
||||||
|
| :--------: | :----: | :----: |
|
||||||
|
| Arabic | 42.185 | 57.803 |
|
||||||
|
| English | 73.529 | 85.01 |
|
||||||
|
| German | 55.882 | 72.555 |
|
||||||
|
| Greek | 45.21 | 62.207 |
|
||||||
|
| Spanish | 58.067 | 76.406 |
|
||||||
|
| Hindi | 40.588 | 55.29 |
|
||||||
|
| Russian | 55.126 | 71.617 |
|
||||||
|
| Thai | 26.891 | 39.965 |
|
||||||
|
| Turkish | 34.874 | 51.138 |
|
||||||
|
| Vietnamese | 47.983 | 68.125 |
|
||||||
|
| Chinese | 47.395 | 58.928 |
|
|
@ -1,9 +1,11 @@
|
||||||
{
|
{
|
||||||
|
"_name_or_path": "bert-base-multilingual-cased",
|
||||||
"architectures": [
|
"architectures": [
|
||||||
"BertForQuestionAnswering"
|
"BertForQuestionAnswering"
|
||||||
],
|
],
|
||||||
"attention_probs_dropout_prob": 0.1,
|
"attention_probs_dropout_prob": 0.1,
|
||||||
"directionality": "bidi",
|
"directionality": "bidi",
|
||||||
|
"gradient_checkpointing": false,
|
||||||
"hidden_act": "gelu",
|
"hidden_act": "gelu",
|
||||||
"hidden_dropout_prob": 0.1,
|
"hidden_dropout_prob": 0.1,
|
||||||
"hidden_size": 768,
|
"hidden_size": 768,
|
||||||
|
@ -20,6 +22,9 @@
|
||||||
"pooler_num_fc_layers": 3,
|
"pooler_num_fc_layers": 3,
|
||||||
"pooler_size_per_head": 128,
|
"pooler_size_per_head": 128,
|
||||||
"pooler_type": "first_token_transform",
|
"pooler_type": "first_token_transform",
|
||||||
|
"position_embedding_type": "absolute",
|
||||||
|
"transformers_version": "4.4.0.dev0",
|
||||||
"type_vocab_size": 2,
|
"type_vocab_size": 2,
|
||||||
|
"use_cache": true,
|
||||||
"vocab_size": 119547
|
"vocab_size": 119547
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
BIN
pytorch_model.bin (Stored with Git LFS)
BIN
pytorch_model.bin (Stored with Git LFS)
Binary file not shown.
Binary file not shown.
|
@ -1 +1 @@
|
||||||
{"do_lower_case": false, "model_max_length": 512, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
{"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "name_or_path": "bert-base-multilingual-cased"}
|
Binary file not shown.
Loading…
Reference in New Issue