task |
dataset |
metrics |
type |
Coreference resolution |
|
type |
name |
config |
split |
revision |
winogrande |
Winogrande XL (xl) |
xl |
validation |
a80f460359d1e9a67c006011c94de42a8759430c |
|
type |
value |
Accuracy |
53.67 |
|
|
|
task |
dataset |
metrics |
type |
Coreference resolution |
|
type |
name |
config |
split |
revision |
Muennighoff/xwinograd |
XWinograd (en) |
en |
test |
9dd5ea5505fad86b7bedad667955577815300cee |
|
type |
value |
Accuracy |
59.23 |
|
|
|
task |
dataset |
metrics |
type |
Coreference resolution |
|
type |
name |
config |
split |
revision |
Muennighoff/xwinograd |
XWinograd (fr) |
fr |
test |
9dd5ea5505fad86b7bedad667955577815300cee |
|
type |
value |
Accuracy |
53.01 |
|
|
|
task |
dataset |
metrics |
type |
Coreference resolution |
|
type |
name |
config |
split |
revision |
Muennighoff/xwinograd |
XWinograd (jp) |
jp |
test |
9dd5ea5505fad86b7bedad667955577815300cee |
|
type |
value |
Accuracy |
52.45 |
|
|
|
task |
dataset |
metrics |
type |
Coreference resolution |
|
type |
name |
config |
split |
revision |
Muennighoff/xwinograd |
XWinograd (pt) |
pt |
test |
9dd5ea5505fad86b7bedad667955577815300cee |
|
type |
value |
Accuracy |
53.61 |
|
|
|
task |
dataset |
metrics |
type |
Coreference resolution |
|
type |
name |
config |
split |
revision |
Muennighoff/xwinograd |
XWinograd (ru) |
ru |
test |
9dd5ea5505fad86b7bedad667955577815300cee |
|
type |
value |
Accuracy |
53.97 |
|
|
|
task |
dataset |
metrics |
type |
Coreference resolution |
|
type |
name |
config |
split |
revision |
Muennighoff/xwinograd |
XWinograd (zh) |
zh |
test |
9dd5ea5505fad86b7bedad667955577815300cee |
|
type |
value |
Accuracy |
60.91 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
anli |
ANLI (r1) |
r1 |
validation |
9dbd830a06fea8b1c49d6e5ef2004a08d9f45094 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
anli |
ANLI (r2) |
r2 |
validation |
9dbd830a06fea8b1c49d6e5ef2004a08d9f45094 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
anli |
ANLI (r3) |
r3 |
validation |
9dbd830a06fea8b1c49d6e5ef2004a08d9f45094 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
super_glue |
SuperGLUE (cb) |
cb |
validation |
9e12063561e7e6c79099feb6d5a493142584e9e2 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
super_glue |
SuperGLUE (rte) |
rte |
validation |
9e12063561e7e6c79099feb6d5a493142584e9e2 |
|
type |
value |
Accuracy |
76.17 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (ar) |
ar |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
53.29 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (bg) |
bg |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
43.82 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (de) |
de |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
45.26 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (el) |
el |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
42.61 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (en) |
en |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
57.31 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (es) |
es |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
56.14 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (fr) |
fr |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
55.78 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (hi) |
hi |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
51.49 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (ru) |
ru |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
47.11 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (sw) |
sw |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
47.83 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (th) |
th |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
42.93 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (tr) |
tr |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
37.23 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (ur) |
ur |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
49.04 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (vi) |
vi |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
53.98 |
|
|
|
task |
dataset |
metrics |
type |
Natural language inference |
|
type |
name |
config |
split |
revision |
xnli |
XNLI (zh) |
zh |
validation |
a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16 |
|
type |
value |
Accuracy |
54.18 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
openai_humaneval |
HumanEval |
None |
test |
e8dc562f5de170c54b5481011dd9f4fa04845771 |
|
|
|
type |
value |
Pass@100 |
19.06 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
story_cloze |
StoryCloze (2016) |
2016 |
validation |
e724c6f8cdf7c7a2fb229d862226e15b023ee4db |
|
type |
value |
Accuracy |
87.33 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
super_glue |
SuperGLUE (copa) |
copa |
validation |
9e12063561e7e6c79099feb6d5a493142584e9e2 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
xcopa |
XCOPA (et) |
et |
validation |
37f73c60fb123111fa5af5f9b705d0b3747fd187 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
xcopa |
XCOPA (ht) |
ht |
validation |
37f73c60fb123111fa5af5f9b705d0b3747fd187 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
xcopa |
XCOPA (id) |
id |
validation |
37f73c60fb123111fa5af5f9b705d0b3747fd187 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
xcopa |
XCOPA (it) |
it |
validation |
37f73c60fb123111fa5af5f9b705d0b3747fd187 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
xcopa |
XCOPA (qu) |
qu |
validation |
37f73c60fb123111fa5af5f9b705d0b3747fd187 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
xcopa |
XCOPA (sw) |
sw |
validation |
37f73c60fb123111fa5af5f9b705d0b3747fd187 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
xcopa |
XCOPA (ta) |
ta |
validation |
37f73c60fb123111fa5af5f9b705d0b3747fd187 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
xcopa |
XCOPA (th) |
th |
validation |
37f73c60fb123111fa5af5f9b705d0b3747fd187 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
xcopa |
XCOPA (tr) |
tr |
validation |
37f73c60fb123111fa5af5f9b705d0b3747fd187 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
xcopa |
XCOPA (vi) |
vi |
validation |
37f73c60fb123111fa5af5f9b705d0b3747fd187 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
xcopa |
XCOPA (zh) |
zh |
validation |
37f73c60fb123111fa5af5f9b705d0b3747fd187 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
Muennighoff/xstory_cloze |
XStoryCloze (ar) |
ar |
validation |
8bb76e594b68147f1a430e86829d07189622b90d |
|
type |
value |
Accuracy |
80.61 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
Muennighoff/xstory_cloze |
XStoryCloze (es) |
es |
validation |
8bb76e594b68147f1a430e86829d07189622b90d |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
Muennighoff/xstory_cloze |
XStoryCloze (eu) |
eu |
validation |
8bb76e594b68147f1a430e86829d07189622b90d |
|
type |
value |
Accuracy |
70.95 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
Muennighoff/xstory_cloze |
XStoryCloze (hi) |
hi |
validation |
8bb76e594b68147f1a430e86829d07189622b90d |
|
type |
value |
Accuracy |
78.89 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
Muennighoff/xstory_cloze |
XStoryCloze (id) |
id |
validation |
8bb76e594b68147f1a430e86829d07189622b90d |
|
type |
value |
Accuracy |
82.99 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
Muennighoff/xstory_cloze |
XStoryCloze (my) |
my |
validation |
8bb76e594b68147f1a430e86829d07189622b90d |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
Muennighoff/xstory_cloze |
XStoryCloze (ru) |
ru |
validation |
8bb76e594b68147f1a430e86829d07189622b90d |
|
type |
value |
Accuracy |
61.42 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
Muennighoff/xstory_cloze |
XStoryCloze (sw) |
sw |
validation |
8bb76e594b68147f1a430e86829d07189622b90d |
|
type |
value |
Accuracy |
69.69 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
Muennighoff/xstory_cloze |
XStoryCloze (te) |
te |
validation |
8bb76e594b68147f1a430e86829d07189622b90d |
|
type |
value |
Accuracy |
73.66 |
|
|
|
task |
dataset |
metrics |
|
type |
name |
config |
split |
revision |
Muennighoff/xstory_cloze |
XStoryCloze (zh) |
zh |
validation |
8bb76e594b68147f1a430e86829d07189622b90d |
|
type |
value |
Accuracy |
84.32 |
|
|
|