commit da316b9c48ade7b50ac51066282d055303336b5e Author: jianjiang Date: Fri Apr 21 14:35:01 2023 +0800 ok diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..671699b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.8.13 + +WORKDIR /app + +COPY . /app + +RUN pip config set global.index-url https://pypi.mirrors.ustc.edu.cn/simple +RUN pip install -r requirements.txt + +CMD ["python", "app.py"] diff --git a/app.py b/app.py new file mode 100644 index 0000000..fdade31 --- /dev/null +++ b/app.py @@ -0,0 +1,39 @@ +from transformers import ViltProcessor, ViltForQuestionAnswering +from PIL import Image +import gradio as gr +import torch +from gradio.themes.utils import sizes + +theme = gr.themes.Default(radius_size=sizes.radius_none).set( + block_label_text_color = '#4D63FF', + block_title_text_color = '#4D63FF', + button_primary_text_color = '#4D63FF', + button_primary_background_fill='#FFFFFF', + button_primary_border_color='#4D63FF', + button_primary_background_fill_hover='#EDEFFF', +) +processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa") +model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa") + + +def vqa(image, question): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + inputs = processor(inp, question, return_tensors="pt") + + outputs = model(**inputs) + logits = outputs.logits + idx = logits.argmax(-1).item() + + return model.config.id2label[idx] + + +demo = gr.Interface(fn=vqa, + inputs=['image', 'text'], + outputs='text', + title = "vqa", + theme=theme, + examples = [['soccer.jpg', 'how many people in the picture?']]) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c77de10 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +gradio +transformers +torch +Pillow diff --git a/soccer.jpg b/soccer.jpg new file mode 100644 index 0000000..3f2ec82 Binary files /dev/null and b/soccer.jpg differ