From d8a6a968afe3c8be5d9418abac7f40941446e945 Mon Sep 17 00:00:00 2001 From: JB Polle <jbpolle83@gmail.com> Date: Fri, 12 Mar 2021 14:41:22 +0000 Subject: [PATCH 1/5] Create README.md --- README.md | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..1778ecb --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +--- +language: fr +widget: +dataset: Jean-Baptiste/wikiner-fr +- text: "Je m'appelle Jean-Baptiste et je vis à Paris" +--- + +# camembert-ner: model fine-tuned from camemBERT for NER task. + +## Introduction + +[camembert-ner] is a NER model that was fine-tuned from camemBERT on wikiner-fr dataset + + +## How to use camembert-ner with HuggingFace + +##### Load camembert-ner and its sub-word tokenizer : + +```python +from transformers import AutoTokenizer, AutoModelForTokenClassification + +tokenizer = AutoTokenizer.from_pretrained("Jean-Baptiste/camembert-ner") +model = AutoModelForTokenClassification.from_pretrained("Jean-Baptiste/camembert-ner") + + +##### Process text sample (from wikipedia) + +from transformers import pipeline + +nlp = pipeline('ner', model=model, tokenizer=tokenizer, grouped_entities=True) +nlp("Apple est créée le 1er avril 1976 dans le garage de la maison d'enfance de Steve Jobs à Los Altos en Californie par Steve Jobs, Steve Wozniak et Ronald Wayne14, puis constituée sous forme de société le 3 janvier 1977 à l'origine sous le nom d'Apple Computer, mais pour ses 30 ans et pour refléter la diversification de ses produits, le mot « computer » est retiré le 9 janvier 2015.") + + +[{'entity_group': 'ORG', + 'score': 0.9472818374633789, + 'word': 'Apple', + 'start': 0, + 'end': 5}, + {'entity_group': 'PER', + 'score': 0.9838564991950989, + 'word': 'Steve Jobs', + 'start': 74, + 'end': 85}, + {'entity_group': 'LOC', + 'score': 0.9831605950991312, + 'word': 'Los Altos', + 'start': 87, + 'end': 97}, + {'entity_group': 'LOC', + 'score': 0.9834540486335754, + 'word': 'Californie', + 'start': 100, + 'end': 111}, + {'entity_group': 'PER', + 'score': 0.9841555754343668, + 'word': 'Steve Jobs', + 'start': 115, + 'end': 126}, + {'entity_group': 'PER', + 'score': 0.9843501806259155, + 'word': 'Steve Wozniak', + 'start': 127, + 'end': 141}, + {'entity_group': 'PER', + 'score': 0.9841533899307251, + 'word': 'Ronald Wayne', + 'start': 144, + 'end': 157}, + {'entity_group': 'ORG', + 'score': 0.9468960364659628, + 'word': 'Apple Computer', + 'start': 243, + 'end': 257}] + +``` + + +## Authors + + + From cfd7e7b4dccf34c6de264479db3cc752ad6807f9 Mon Sep 17 00:00:00 2001 From: JB Polle <jbpolle83@gmail.com> Date: Fri, 12 Mar 2021 14:41:56 +0000 Subject: [PATCH 2/5] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 1778ecb..aeac655 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ --- language: fr widget: -dataset: Jean-Baptiste/wikiner-fr - text: "Je m'appelle Jean-Baptiste et je vis à Paris" --- From 3f61b3020c0c9bf6a9469b92ff6b62113c6aa5ba Mon Sep 17 00:00:00 2001 From: JB Polle <jbpolle83@gmail.com> Date: Fri, 12 Mar 2021 14:57:03 +0000 Subject: [PATCH 3/5] Update README.md --- README.md | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index aeac655..f3f93ec 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,8 @@ widget: ## Introduction -[camembert-ner] is a NER model that was fine-tuned from camemBERT on wikiner-fr dataset +[camembert-ner] is a NER model that was fine-tuned from camemBERT on wikiner-fr dataset. +Model was trained on subset of wikiner-fr dataset (~36 000 sentences) ## How to use camembert-ner with HuggingFace @@ -74,7 +75,26 @@ nlp("Apple est créée le 1er avril 1976 dans le garage de la maison d'enfance d ``` -## Authors +## Model performances +Global +``` +'precision': 0.8852223816355811 +'recall': 0.8977156991124691 +'f1': 0.8914252690890703 +``` +By entity +``` +'PER': {'precision': 0.9367143476376246, + 'recall': 0.9583148558758315, + 'f1': 0.9473914949583516, + 'number': 2255}, +'MISC': {'precision': 0.831053901850362, + 'recall': 0.815955766192733, + 'f1': 0.823435631725787}, +'LOC': {'precision': 0.8701754385964913, + 'recall': 0.8878281622911695, + 'f1': 0.8789131718842291} + ``` From a7d6832a53771106d91559d5dfda3fb737082199 Mon Sep 17 00:00:00 2001 From: JB Polle <jbpolle83@gmail.com> Date: Fri, 12 Mar 2021 15:01:53 +0000 Subject: [PATCH 4/5] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index f3f93ec..e1d62e7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ --- language: fr +datasets: +- Jean-Baptiste/wikiner_fr widget: - text: "Je m'appelle Jean-Baptiste et je vis à Paris" --- From bcb19a196125835dbf388ed127649b032f71a67c Mon Sep 17 00:00:00 2001 From: JB Polle <jbpolle83@gmail.com> Date: Fri, 12 Mar 2021 17:53:07 +0000 Subject: [PATCH 5/5] Update README.md --- README.md | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index e1d62e7..28e5552 100644 --- a/README.md +++ b/README.md @@ -77,26 +77,28 @@ nlp("Apple est créée le 1er avril 1976 dans le garage de la maison d'enfance d ``` -## Model performances +## Model performances (metric: seqeval) Global ``` -'precision': 0.8852223816355811 -'recall': 0.8977156991124691 -'f1': 0.8914252690890703 +'precision': 0.8830965723967158 +'recall': 0.8915789473684211 +'f1': 0.8873174883781837 ``` By entity ``` -'PER': {'precision': 0.9367143476376246, - 'recall': 0.9583148558758315, - 'f1': 0.9473914949583516, - 'number': 2255}, -'MISC': {'precision': 0.831053901850362, - 'recall': 0.815955766192733, - 'f1': 0.823435631725787}, 'LOC': {'precision': 0.8701754385964913, 'recall': 0.8878281622911695, - 'f1': 0.8789131718842291} - ``` + 'f1': 0.8789131718842291}, +'MISC': {'precision': 0.831053901850362, + 'recall': 0.815955766192733, + 'f1': 0.823435631725787}, +'ORG': {'precision': 0.8620199146514936, + 'recall': 0.8335625859697386, + 'f1': 0.8475524475524475}, + 'PER': {'precision': 0.9367143476376246, + 'recall': 0.9583148558758315, + 'f1': 0.947391494958} + ```