Merge branch 'main' of https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment into main
This commit is contained in:
commit
1f8684fe27
35
README.md
35
README.md
|
@ -1,19 +1,9 @@
|
|||
# twitter-XLM-roBERTa-base for Sentiment Analysis
|
||||
|
||||
This is a XLM-roBERTa-base model trained on ~198M tweets and finetuned for sentiment analysis in
|
||||
|
||||
|
||||
TODO: create model card
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
This is a roBERTa-base model trained on ~58M tweets and finetuned for sentiment analysis with the TweetEval benchmark.
|
||||
|
||||
- Paper: [_TweetEval_ benchmark (Findings of EMNLP 2020)](https://arxiv.org/pdf/2010.12421.pdf).
|
||||
- Git Repo: [Tweeteval official repository](https://github.com/cardiffnlp/tweeteval).
|
||||
- Paper: [XLM-T: A Multilingual Language Model Toolkit for Twitter](https://...).
|
||||
- Git Repo: [Tweeteval official repository](https://github.com/cardiffnlp/xlm-t).
|
||||
|
||||
## Example of classification
|
||||
|
||||
|
@ -37,22 +27,17 @@ def preprocess(text):
|
|||
new_text.append(t)
|
||||
return " ".join(new_text)
|
||||
|
||||
# Tasks:
|
||||
# emoji, emotion, hate, irony, offensive, sentiment
|
||||
# stance/abortion, stance/atheism, stance/climate, stance/feminist, stance/hillary
|
||||
|
||||
task='sentiment'
|
||||
MODEL = f"cardiffnlp/twitter-roberta-base-{task}"
|
||||
|
||||
MODEL = f"cardiffnlp/twitter-xlm-roberta-base-sentiment"
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
||||
|
||||
# download label mapping
|
||||
labels=[]
|
||||
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
|
||||
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt"
|
||||
with urllib.request.urlopen(mapping_link) as f:
|
||||
html = f.read().decode('utf-8').split("\
|
||||
html = f.read().decode('utf-8').split("\\
|
||||
")
|
||||
csvreader = csv.reader(html, delimiter='\\t')
|
||||
csvreader = csv.reader(html, delimiter='\\\\t')
|
||||
labels = [row[1] for row in csvreader if len(row) > 1]
|
||||
|
||||
# PT
|
||||
|
@ -88,8 +73,8 @@ for i in range(scores.shape[0]):
|
|||
Output:
|
||||
|
||||
```
|
||||
1) positive 0.8466
|
||||
2) neutral 0.1458
|
||||
3) negative 0.0076
|
||||
1) positive 0.76726073
|
||||
2) neutral 0.201
|
||||
3) negative 0.0312
|
||||
```
|
||||
|
||||
|
|
Loading…
Reference in New Issue