add tokenizer
Build-Deploy-Actions Details

This commit is contained in:
jianjiang 2023-04-21 16:01:08 +08:00
parent 76bc815080
commit ac50ab9be5
2 changed files with 7 additions and 6 deletions

12
app.py
View File

@ -3,12 +3,6 @@ from languages import LANGUANGE_MAP
import gradio as gr import gradio as gr
import torch import torch
from gradio.themes.utils import sizes from gradio.themes.utils import sizes
#model_ckpt = "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"
model_ckpt = "papluca/xlm-roberta-base-language-detection"
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
theme = gr.themes.Default(radius_size=sizes.radius_none).set( theme = gr.themes.Default(radius_size=sizes.radius_none).set(
block_label_text_color = '#4D63FF', block_label_text_color = '#4D63FF',
block_title_text_color = '#4D63FF', block_title_text_color = '#4D63FF',
@ -18,6 +12,12 @@ theme = gr.themes.Default(radius_size=sizes.radius_none).set(
button_primary_background_fill_hover='#EDEFFF', button_primary_background_fill_hover='#EDEFFF',
) )
#model_ckpt = "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"
model_ckpt = "papluca/xlm-roberta-base-language-detection"
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
def detect_language(sentence): def detect_language(sentence):
tokenized_sentence = tokenizer(sentence, return_tensors='pt') tokenized_sentence = tokenizer(sentence, return_tensors='pt')

View File

@ -1,3 +1,4 @@
gradio gradio
transformers transformers
torch torch
sentencepiece