Merge branch 'main' of https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment into main
This commit is contained in:
commit
1f8684fe27
35
README.md
35
README.md
|
@ -1,19 +1,9 @@
|
||||||
# twitter-XLM-roBERTa-base for Sentiment Analysis
|
# twitter-XLM-roBERTa-base for Sentiment Analysis
|
||||||
|
|
||||||
|
This is a XLM-roBERTa-base model trained on ~198M tweets and finetuned for sentiment analysis in
|
||||||
|
|
||||||
|
- Paper: [XLM-T: A Multilingual Language Model Toolkit for Twitter](https://...).
|
||||||
TODO: create model card
|
- Git Repo: [Tweeteval official repository](https://github.com/cardiffnlp/xlm-t).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
This is a roBERTa-base model trained on ~58M tweets and finetuned for sentiment analysis with the TweetEval benchmark.
|
|
||||||
|
|
||||||
- Paper: [_TweetEval_ benchmark (Findings of EMNLP 2020)](https://arxiv.org/pdf/2010.12421.pdf).
|
|
||||||
- Git Repo: [Tweeteval official repository](https://github.com/cardiffnlp/tweeteval).
|
|
||||||
|
|
||||||
## Example of classification
|
## Example of classification
|
||||||
|
|
||||||
|
@ -37,22 +27,17 @@ def preprocess(text):
|
||||||
new_text.append(t)
|
new_text.append(t)
|
||||||
return " ".join(new_text)
|
return " ".join(new_text)
|
||||||
|
|
||||||
# Tasks:
|
|
||||||
# emoji, emotion, hate, irony, offensive, sentiment
|
|
||||||
# stance/abortion, stance/atheism, stance/climate, stance/feminist, stance/hillary
|
|
||||||
|
|
||||||
task='sentiment'
|
|
||||||
MODEL = f"cardiffnlp/twitter-roberta-base-{task}"
|
|
||||||
|
|
||||||
|
MODEL = f"cardiffnlp/twitter-xlm-roberta-base-sentiment"
|
||||||
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
||||||
|
|
||||||
# download label mapping
|
# download label mapping
|
||||||
labels=[]
|
labels=[]
|
||||||
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
|
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt"
|
||||||
with urllib.request.urlopen(mapping_link) as f:
|
with urllib.request.urlopen(mapping_link) as f:
|
||||||
html = f.read().decode('utf-8').split("\
|
html = f.read().decode('utf-8').split("\\
|
||||||
")
|
")
|
||||||
csvreader = csv.reader(html, delimiter='\\t')
|
csvreader = csv.reader(html, delimiter='\\\\t')
|
||||||
labels = [row[1] for row in csvreader if len(row) > 1]
|
labels = [row[1] for row in csvreader if len(row) > 1]
|
||||||
|
|
||||||
# PT
|
# PT
|
||||||
|
@ -88,8 +73,8 @@ for i in range(scores.shape[0]):
|
||||||
Output:
|
Output:
|
||||||
|
|
||||||
```
|
```
|
||||||
1) positive 0.8466
|
1) positive 0.76726073
|
||||||
2) neutral 0.1458
|
2) neutral 0.201
|
||||||
3) negative 0.0076
|
3) negative 0.0312
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue