from transformers import pipeline import gradio as gr import cv2 from PIL import Image image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") def ocr(image): inp = Image.fromarray(image.astype('uint8'), 'RGB') text = image_to_text(inp) total_caption = "" for caption in text: total_caption = total_caption + caption.get('generated_text') total_caption = total_caption + '\r\n' return total_caption demo = gr.Interface(fn=ocr, inputs='image', outputs='text', title = "image2text", examples = ['soccer.jpg']) if __name__ == "__main__": demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7016)