Merge branch 'main' of https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment into main

2021-04-17 05:57:04 +00:00 · 2021-04-17 05:57:04 +00:00 · 1f8684fe27
parent 891655a348 199ac16fae
commit 1f8684fe27
1 changed files with 10 additions and 25 deletions
--- a/README.md
+++ b/README.md
@ -1,19 +1,9 @@
 # twitter-XLM-roBERTa-base for Sentiment Analysis

+This is a XLM-roBERTa-base model trained on ~198M tweets and finetuned for sentiment analysis in 

-
-TODO: create model card
-
-
-
-
-
-
-
-This is a roBERTa-base model trained on ~58M tweets and finetuned for sentiment analysis with the TweetEval benchmark.
-
- Paper: [_TweetEval_ benchmark (Findings of EMNLP 2020)](https://arxiv.org/pdf/2010.12421.pdf). 
- Git Repo: [Tweeteval official repository](https://github.com/cardiffnlp/tweeteval).
+- Paper: [XLM-T: A Multilingual Language Model Toolkit for Twitter](https://...). 
+- Git Repo: [Tweeteval official repository](https://github.com/cardiffnlp/xlm-t).

 ## Example of classification

@ -37,22 +27,17 @@ def preprocess(text):
        new_text.append(t)
    return " ".join(new_text)

-# Tasks:
-# emoji, emotion, hate, irony, offensive, sentiment
-# stance/abortion, stance/atheism, stance/climate, stance/feminist, stance/hillary
-
-task='sentiment'
-MODEL = f"cardiffnlp/twitter-roberta-base-{task}"

+MODEL = f"cardiffnlp/twitter-xlm-roberta-base-sentiment"
 tokenizer = AutoTokenizer.from_pretrained(MODEL)

 # download label mapping
 labels=[]
-mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
+mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt"
 with urllib.request.urlopen(mapping_link) as f:
-    html = f.read().decode('utf-8').split("\
+    html = f.read().decode('utf-8').split("\\
 ")
-    csvreader = csv.reader(html, delimiter='\\t')
+    csvreader = csv.reader(html, delimiter='\\\\t')
 labels = [row[1] for row in csvreader if len(row) > 1]

 # PT
@ -88,8 +73,8 @@ for i in range(scores.shape[0]):
 Output: 

 ```
-1) positive 0.8466
-2) neutral 0.1458
-3) negative 0.0076
+1) positive 0.76726073
+2) neutral 0.201
+3) negative 0.0312
 ```