From ac50ab9be531fd6c6794ccbacfedb651abcf289b Mon Sep 17 00:00:00 2001 From: jianjiang Date: Fri, 21 Apr 2023 16:01:08 +0800 Subject: [PATCH] add tokenizer --- app.py | 12 ++++++------ requirements.txt | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/app.py b/app.py index 439f5f3..e0e0a72 100644 --- a/app.py +++ b/app.py @@ -3,12 +3,6 @@ from languages import LANGUANGE_MAP import gradio as gr import torch from gradio.themes.utils import sizes - -#model_ckpt = "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base" -model_ckpt = "papluca/xlm-roberta-base-language-detection" -model = AutoModelForSequenceClassification.from_pretrained(model_ckpt) -tokenizer = AutoTokenizer.from_pretrained(model_ckpt) - theme = gr.themes.Default(radius_size=sizes.radius_none).set( block_label_text_color = '#4D63FF', block_title_text_color = '#4D63FF', @@ -18,6 +12,12 @@ theme = gr.themes.Default(radius_size=sizes.radius_none).set( button_primary_background_fill_hover='#EDEFFF', ) +#model_ckpt = "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base" +model_ckpt = "papluca/xlm-roberta-base-language-detection" +model = AutoModelForSequenceClassification.from_pretrained(model_ckpt) +tokenizer = AutoTokenizer.from_pretrained(model_ckpt) + + def detect_language(sentence): tokenized_sentence = tokenizer(sentence, return_tensors='pt') diff --git a/requirements.txt b/requirements.txt index f8e1863..d354402 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ gradio transformers torch +sentencepiece