Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator (#10)

- Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator (0a4d5e89fe6716df5491f488ce43ad0a9ee508a1)


Co-authored-by: Evaluation Bot <autoevaluator@users.noreply.huggingface.co>
This commit is contained in:
Philipp Schmid 2022-12-05 13:32:46 +00:00 committed by system
parent 18b336aea4
commit 20cced441c
1 changed files with 30 additions and 25 deletions

View File

@ -1,4 +1,3 @@
--- ---
language: en language: en
license: mit license: mit
@ -23,24 +22,24 @@ model-index:
name: 'SAMSum Corpus: A Human-annotated Dialogue Dataset for Abstractive Summarization' name: 'SAMSum Corpus: A Human-annotated Dialogue Dataset for Abstractive Summarization'
type: samsum type: samsum
metrics: metrics:
- name: Validation ROGUE-1 - type: rogue-1
type: rogue-1
value: 42.621 value: 42.621
- name: Validation ROGUE-2 name: Validation ROGUE-1
type: rogue-2 - type: rogue-2
value: 21.9825 value: 21.9825
- name: Validation ROGUE-L name: Validation ROGUE-2
type: rogue-l - type: rogue-l
value: 33.034 value: 33.034
- name: Test ROGUE-1 name: Validation ROGUE-L
type: rogue-1 - type: rogue-1
value: 41.3174 value: 41.3174
- name: Test ROGUE-2 name: Test ROGUE-1
type: rogue-2 - type: rogue-2
value: 20.8716 value: 20.8716
- name: Test ROGUE-L name: Test ROGUE-2
type: rogue-l - type: rogue-l
value: 32.1337 value: 32.1337
name: Test ROGUE-L
- task: - task:
type: summarization type: summarization
name: Summarization name: Summarization
@ -50,30 +49,36 @@ model-index:
config: samsum config: samsum
split: test split: test
metrics: metrics:
- name: ROUGE-1 - type: rouge
type: rouge
value: 41.3282 value: 41.3282
name: ROUGE-1
verified: true verified: true
- name: ROUGE-2 verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZTYzNzZkZDUzOWQzNGYxYTJhNGE4YWYyZjA0NzMyOWUzMDNhMmVhYzY1YTM0ZTJhYjliNGE4MDZhMjhhYjRkYSIsInZlcnNpb24iOjF9.OOM6l3v5rJCndmUIJV-2SDh2NjbPo5IgQOSL-Ju1Gwbi1voL5amsDEDOelaqlUBE3n55KkUsMLZhyn66yWxZBQ
type: rouge - type: rouge
value: 20.8755 value: 20.8755
name: ROUGE-2
verified: true verified: true
- name: ROUGE-L verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMWZiODFiYWQzY2NmOTc5YjA3NTI0YzQ1MzQ0ODk2NjgyMmVlMjA5MjZiNTJkMGRmZGEzN2M3MDNkMjkxMDVhYSIsInZlcnNpb24iOjF9.b8cPk2-IL24La3Vd0hhtii4tRXujh5urAwy6IVeTWHwYfXaURyC2CcQOWtlOx5bdO5KACeaJFrFBCGgjk-VGCQ
type: rouge - type: rouge
value: 32.1353 value: 32.1353
name: ROUGE-L
verified: true verified: true
- name: ROUGE-LSUM verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYWNmYzdiYWQ2ZWRkYzRiMGMxNWUwODgwZTdkY2NjZTc1NWE5NTFiMzU0OTU1N2JjN2ExYWQ2NGZkNjk5OTc4YSIsInZlcnNpb24iOjF9.Fzv4p-TEVicljiCqsBJHK1GsnE_AwGqamVmxTPI0WBNSIhZEhliRGmIL_z1pDq6WOzv3GN2YUGvhowU7GxnyAQ
type: rouge - type: rouge
value: 38.401 value: 38.401
name: ROUGE-LSUM
verified: true verified: true
- name: loss verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNGI4MWY0NWMxMmQ0ODQ5MDhiNDczMDAzYzJkODBiMzgzYWNkMWM2YTZkZDJmNWJiOGQ3MmNjMGViN2UzYWI2ZSIsInZlcnNpb24iOjF9.7lw3h5k5lJ7tYFLZGUtLyDabFYd00l6ByhmvkW4fykocBy9Blyin4tdw4Xps4DW-pmrdMLgidHxBWz5MrSx1Bw
type: loss - type: loss
value: 1.4297215938568115 value: 1.4297215938568115
name: loss
verified: true verified: true
- name: gen_len verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzI0ZWNhNDM5YTViZDMyZGJjMDA1ZWFjYzNhOTdlOTFiNzhhMDBjNmM2MjA3ZmRkZjJjMjEyMGY3MzcwOTI2NyIsInZlcnNpb24iOjF9.oNaZsAtUDqGAqoZWJavlcW7PKx1AWsnkbhaQxadpOKk_u7ywJJabvTtzyx_DwEgZslgDETCf4MM-JKitZKjiDA
type: gen_len - type: gen_len
value: 60.0757 value: 60.0757
name: gen_len
verified: true verified: true
verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYTgwYWYwMDRkNTJkMDM5N2I2MWNmYzQ3OWM1NDJmODUyZGViMGE4ZTdkNmIwYWM2N2VjZDNmN2RiMDE4YTYyYiIsInZlcnNpb24iOjF9.PbXTcNYX_SW-BuRQEcqyc21M7uKrOMbffQSAK6k2GLzTVRrzZxsDC57ktKL68zRY8fSiRGsnknOwv-nAR6YBCQ
--- ---
## `bart-large-cnn-samsum` ## `bart-large-cnn-samsum`