Compare commits
10 Commits
58670260a4
...
cc63721791
Author | SHA1 | Date |
---|---|---|
|
cc63721791 | |
|
9f8b2203b6 | |
|
df99764b46 | |
|
dbec8489a1 | |
|
72503a84f7 | |
|
ef590151cc | |
|
d0002b604e | |
|
32720699d9 | |
|
6de88b8686 | |
|
ea19a79cbb |
|
@ -15,3 +15,4 @@
|
||||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
|
53
README.md
53
README.md
|
@ -4,6 +4,8 @@ datasets:
|
||||||
- Jean-Baptiste/wikiner_fr
|
- Jean-Baptiste/wikiner_fr
|
||||||
widget:
|
widget:
|
||||||
- text: "Je m'appelle jean-baptiste et je vis à montréal"
|
- text: "Je m'appelle jean-baptiste et je vis à montréal"
|
||||||
|
- text: "george washington est allé à washington"
|
||||||
|
license: mit
|
||||||
---
|
---
|
||||||
|
|
||||||
# camembert-ner: model fine-tuned from camemBERT for NER task.
|
# camembert-ner: model fine-tuned from camemBERT for NER task.
|
||||||
|
@ -12,9 +14,20 @@ widget:
|
||||||
|
|
||||||
[camembert-ner] is a NER model that was fine-tuned from camemBERT on wikiner-fr dataset.
|
[camembert-ner] is a NER model that was fine-tuned from camemBERT on wikiner-fr dataset.
|
||||||
Model was trained on wikiner-fr dataset (~170 634 sentences).
|
Model was trained on wikiner-fr dataset (~170 634 sentences).
|
||||||
Model was validated on emails/chat data and surperformed other models on this type of data specifically.
|
Model was validated on emails/chat data and overperformed other models on this type of data specifically.
|
||||||
In particular the model seems to work better on entity that don't start with an upper case.
|
In particular the model seems to work better on entity that don't start with an upper case.
|
||||||
|
|
||||||
|
## Training data
|
||||||
|
Training data was classified as follow:
|
||||||
|
|
||||||
|
Abbreviation|Description
|
||||||
|
-|-
|
||||||
|
O |Outside of a named entity
|
||||||
|
MISC |Miscellaneous entity
|
||||||
|
PER |Person’s name
|
||||||
|
ORG |Organization
|
||||||
|
LOC |Location
|
||||||
|
|
||||||
|
|
||||||
## How to use camembert-ner with HuggingFace
|
## How to use camembert-ner with HuggingFace
|
||||||
|
|
||||||
|
@ -31,7 +44,7 @@ model = AutoModelForTokenClassification.from_pretrained("Jean-Baptiste/camembert
|
||||||
|
|
||||||
from transformers import pipeline
|
from transformers import pipeline
|
||||||
|
|
||||||
nlp = pipeline('ner', model=model, tokenizer=tokenizer, grouped_entities=True)
|
nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="simple")
|
||||||
nlp("Apple est créée le 1er avril 1976 dans le garage de la maison d'enfance de Steve Jobs à Los Altos en Californie par Steve Jobs, Steve Wozniak et Ronald Wayne14, puis constituée sous forme de société le 3 janvier 1977 à l'origine sous le nom d'Apple Computer, mais pour ses 30 ans et pour refléter la diversification de ses produits, le mot « computer » est retiré le 9 janvier 2015.")
|
nlp("Apple est créée le 1er avril 1976 dans le garage de la maison d'enfance de Steve Jobs à Los Altos en Californie par Steve Jobs, Steve Wozniak et Ronald Wayne14, puis constituée sous forme de société le 3 janvier 1977 à l'origine sous le nom d'Apple Computer, mais pour ses 30 ans et pour refléter la diversification de ses produits, le mot « computer » est retiré le 9 janvier 2015.")
|
||||||
|
|
||||||
|
|
||||||
|
@ -81,27 +94,23 @@ nlp("Apple est créée le 1er avril 1976 dans le garage de la maison d'enfance d
|
||||||
|
|
||||||
## Model performances (metric: seqeval)
|
## Model performances (metric: seqeval)
|
||||||
|
|
||||||
Global
|
Overall
|
||||||
```
|
|
||||||
'precision': 0.8859
|
precision|recall|f1
|
||||||
'recall': 0.8971
|
-|-|-
|
||||||
'f1': 0.8914
|
0.8859|0.8971|0.8914
|
||||||
```
|
|
||||||
|
|
||||||
By entity
|
By entity
|
||||||
```
|
|
||||||
'LOC': {'precision': 0.8905576596578294,
|
|
||||||
'recall': 0.900554675118859,
|
|
||||||
'f1': 0.8955282684352223},
|
|
||||||
'MISC': {'precision': 0.8175627240143369,
|
|
||||||
'recall': 0.8117437722419929,
|
|
||||||
'f1': 0.8146428571428571},
|
|
||||||
'ORG': {'precision': 0.8099480326651819,
|
|
||||||
'recall': 0.8265151515151515,
|
|
||||||
'f1': 0.8181477315335584},
|
|
||||||
'PER': {'precision': 0.9372509960159362,
|
|
||||||
'recall': 0.959812321501428,
|
|
||||||
'f1': 0.9483975005039308}
|
|
||||||
|
|
||||||
```
|
entity|precision|recall|f1
|
||||||
|
-|-|-|-
|
||||||
|
PER|0.9372|0.9598|0.9483
|
||||||
|
ORG|0.8099|0.8265|0.8181
|
||||||
|
LOC|0.8905|0.9005|0.8955
|
||||||
|
MISC|0.8175|0.8117|0.8146
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
For those who could be interested, here is a short article on how I used the results of this model to train a LSTM model for signature detection in emails:
|
||||||
|
https://medium.com/@jean-baptiste.polle/lstm-model-for-email-signature-detection-8e990384fefa
|
||||||
|
|
16
config.json
16
config.json
|
@ -12,19 +12,19 @@
|
||||||
"hidden_size": 768,
|
"hidden_size": 768,
|
||||||
"id2label": {
|
"id2label": {
|
||||||
"0": "O",
|
"0": "O",
|
||||||
"1": "LOC",
|
"1": "I-LOC",
|
||||||
"2": "PER",
|
"2": "I-PER",
|
||||||
"3": "MISC",
|
"3": "I-MISC",
|
||||||
"4": "ORG"
|
"4": "I-ORG"
|
||||||
},
|
},
|
||||||
"initializer_range": 0.02,
|
"initializer_range": 0.02,
|
||||||
"intermediate_size": 3072,
|
"intermediate_size": 3072,
|
||||||
"label2id": {
|
"label2id": {
|
||||||
"LOC": 1,
|
"I-LOC": 1,
|
||||||
"MISC": 3,
|
"I-MISC": 3,
|
||||||
"O": 0,
|
"O": 0,
|
||||||
"ORG": 4,
|
"I-ORG": 4,
|
||||||
"PER": 2
|
"I-PER": 2
|
||||||
},
|
},
|
||||||
"layer_norm_eps": 1e-05,
|
"layer_norm_eps": 1e-05,
|
||||||
"max_position_embeddings": 514,
|
"max_position_embeddings": 514,
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue