add model src
|
@ -0,0 +1,33 @@
|
|||
import gradio as gr
|
||||
import torch
|
||||
|
||||
model2 = torch.hub.load(
|
||||
"AK391/animegan2-pytorch:main",
|
||||
"generator",
|
||||
pretrained=True,
|
||||
progress=False
|
||||
)
|
||||
model1 = torch.hub.load("AK391/animegan2-pytorch:main", "generator", pretrained="face_paint_512_v1")
|
||||
face2paint = torch.hub.load(
|
||||
'AK391/animegan2-pytorch:main', 'face2paint',
|
||||
size=512,side_by_side=False
|
||||
)
|
||||
|
||||
def inference(img, ver):
|
||||
if ver == 'version 2 (🔺 robustness,🔻 stylization)':
|
||||
out = face2paint(model2, img)
|
||||
else:
|
||||
out = face2paint(model1, img)
|
||||
return out
|
||||
|
||||
title = "动漫风格迁移"
|
||||
examples=[['groot.jpeg','version 2 (🔺 robustness,🔻 stylization)'],['gongyoo.jpeg','version 1 (🔺 stylization, 🔻 robustness)']]
|
||||
|
||||
demo = gr.Interface(
|
||||
fn=inference,
|
||||
inputs=[gr.inputs.Image(type="pil"),gr.inputs.Radio(['version 1 (🔺 stylization, 🔻 robustness)','version 2 (🔺 robustness,🔻 stylization)'], type="value", default='version 2 (🔺 robustness,🔻 stylization)', label='version')],
|
||||
outputs=gr.outputs.Image(type="pil"),
|
||||
title=title,
|
||||
examples=examples)
|
||||
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7022)
|
After Width: | Height: | Size: 18 KiB |
After Width: | Height: | Size: 342 KiB |
|
@ -0,0 +1,81 @@
|
|||
import gradio as gr
|
||||
import numpy as np
|
||||
import torch
|
||||
import matplotlib.pyplot as plt
|
||||
import cv2
|
||||
import sys
|
||||
sys.path.append("..")
|
||||
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
|
||||
from PIL import Image
|
||||
import io
|
||||
|
||||
sam_checkpoint = "sam_vit_b_01ec64.pth"
|
||||
model_type = "vit_b"
|
||||
|
||||
device = "cuda"
|
||||
|
||||
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
|
||||
sam.to(device=device)
|
||||
|
||||
mask_generator = SamAutomaticMaskGenerator(sam)
|
||||
|
||||
mask_generator_2 = SamAutomaticMaskGenerator(
|
||||
model=sam,
|
||||
points_per_side=32,
|
||||
pred_iou_thresh=0.86,
|
||||
stability_score_thresh=0.92,
|
||||
crop_n_layers=1,
|
||||
crop_n_points_downscale_factor=2,
|
||||
min_mask_region_area=100, # Requires open-cv to run post-processing
|
||||
)
|
||||
|
||||
|
||||
def fig2img(fig):
|
||||
buf = io.BytesIO()
|
||||
fig.savefig(buf)
|
||||
buf.seek(0)
|
||||
img = Image.open(buf)
|
||||
return img
|
||||
|
||||
def show_anns(anns):
|
||||
if len(anns) == 0:
|
||||
return
|
||||
sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
|
||||
ax = plt.gca()
|
||||
ax.set_autoscale_on(False)
|
||||
polygons = []
|
||||
color = []
|
||||
for ann in sorted_anns:
|
||||
m = ann['segmentation']
|
||||
img = np.ones((m.shape[0], m.shape[1], 3))
|
||||
color_mask = np.random.random((1, 3)).tolist()[0]
|
||||
for i in range(3):
|
||||
img[:,:,i] = color_mask[i]
|
||||
ax.imshow(np.dstack((img, m*0.35)))
|
||||
|
||||
|
||||
def segment_image(image):
|
||||
image = image.astype('uint8')
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
#masks = mask_generator.generate(image)
|
||||
masks2 = mask_generator_2.generate(image)
|
||||
|
||||
plt.figure(figsize=(20,20))
|
||||
plt.imshow(image)
|
||||
#show_anns(masks)
|
||||
show_anns(masks2)
|
||||
plt.axis('off')
|
||||
|
||||
return fig2img(plt.gcf())
|
||||
|
||||
|
||||
demo = gr.Interface(fn=segment_image,
|
||||
inputs=gr.Image(),
|
||||
outputs=gr.Image(),
|
||||
title = "图像分割",
|
||||
examples = ['dog.jpg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7027)
|
After Width: | Height: | Size: 98 KiB |
|
@ -0,0 +1,81 @@
|
|||
import gradio as gr
|
||||
import numpy as np
|
||||
import torch
|
||||
import matplotlib.pyplot as plt
|
||||
import cv2
|
||||
import sys
|
||||
sys.path.append("..")
|
||||
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
|
||||
from PIL import Image
|
||||
import io
|
||||
|
||||
sam_checkpoint = "sam_vit_h_4b8939.pth"
|
||||
model_type = "vit_h"
|
||||
|
||||
device = "cuda"
|
||||
|
||||
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
|
||||
sam.to(device=device)
|
||||
|
||||
mask_generator = SamAutomaticMaskGenerator(sam)
|
||||
|
||||
mask_generator_2 = SamAutomaticMaskGenerator(
|
||||
model=sam,
|
||||
points_per_side=32,
|
||||
pred_iou_thresh=0.86,
|
||||
stability_score_thresh=0.92,
|
||||
crop_n_layers=1,
|
||||
crop_n_points_downscale_factor=2,
|
||||
min_mask_region_area=100, # Requires open-cv to run post-processing
|
||||
)
|
||||
|
||||
|
||||
def fig2img(fig):
|
||||
buf = io.BytesIO()
|
||||
fig.savefig(buf)
|
||||
buf.seek(0)
|
||||
img = Image.open(buf)
|
||||
return img
|
||||
|
||||
def show_anns(anns):
|
||||
if len(anns) == 0:
|
||||
return
|
||||
sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
|
||||
ax = plt.gca()
|
||||
ax.set_autoscale_on(False)
|
||||
polygons = []
|
||||
color = []
|
||||
for ann in sorted_anns:
|
||||
m = ann['segmentation']
|
||||
img = np.ones((m.shape[0], m.shape[1], 3))
|
||||
color_mask = np.random.random((1, 3)).tolist()[0]
|
||||
for i in range(3):
|
||||
img[:,:,i] = color_mask[i]
|
||||
ax.imshow(np.dstack((img, m*0.35)))
|
||||
|
||||
|
||||
def segment_image(image):
|
||||
image = image.astype('uint8')
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
#masks = mask_generator.generate(image)
|
||||
masks2 = mask_generator_2.generate(image)
|
||||
|
||||
plt.figure(figsize=(20,20))
|
||||
plt.imshow(image)
|
||||
#show_anns(masks)
|
||||
show_anns(masks2)
|
||||
plt.axis('off')
|
||||
|
||||
return fig2img(plt.gcf())
|
||||
|
||||
|
||||
demo = gr.Interface(fn=segment_image,
|
||||
inputs=gr.Image(),
|
||||
outputs=gr.Image(),
|
||||
title = "图像分割",
|
||||
examples = ['dog.jpg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7027)
|
After Width: | Height: | Size: 98 KiB |
|
@ -0,0 +1,81 @@
|
|||
import gradio as gr
|
||||
import numpy as np
|
||||
import torch
|
||||
import matplotlib.pyplot as plt
|
||||
import cv2
|
||||
import sys
|
||||
sys.path.append("..")
|
||||
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
|
||||
from PIL import Image
|
||||
import io
|
||||
|
||||
sam_checkpoint = "sam_vit_l_0b3195.pth"
|
||||
model_type = "vit_l"
|
||||
|
||||
device = "cuda"
|
||||
|
||||
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
|
||||
sam.to(device=device)
|
||||
|
||||
mask_generator = SamAutomaticMaskGenerator(sam)
|
||||
|
||||
mask_generator_2 = SamAutomaticMaskGenerator(
|
||||
model=sam,
|
||||
points_per_side=32,
|
||||
pred_iou_thresh=0.86,
|
||||
stability_score_thresh=0.92,
|
||||
crop_n_layers=1,
|
||||
crop_n_points_downscale_factor=2,
|
||||
min_mask_region_area=100, # Requires open-cv to run post-processing
|
||||
)
|
||||
|
||||
|
||||
def fig2img(fig):
|
||||
buf = io.BytesIO()
|
||||
fig.savefig(buf)
|
||||
buf.seek(0)
|
||||
img = Image.open(buf)
|
||||
return img
|
||||
|
||||
def show_anns(anns):
|
||||
if len(anns) == 0:
|
||||
return
|
||||
sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
|
||||
ax = plt.gca()
|
||||
ax.set_autoscale_on(False)
|
||||
polygons = []
|
||||
color = []
|
||||
for ann in sorted_anns:
|
||||
m = ann['segmentation']
|
||||
img = np.ones((m.shape[0], m.shape[1], 3))
|
||||
color_mask = np.random.random((1, 3)).tolist()[0]
|
||||
for i in range(3):
|
||||
img[:,:,i] = color_mask[i]
|
||||
ax.imshow(np.dstack((img, m*0.35)))
|
||||
|
||||
|
||||
def segment_image(image):
|
||||
image = image.astype('uint8')
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
#masks = mask_generator.generate(image)
|
||||
masks2 = mask_generator_2.generate(image)
|
||||
|
||||
plt.figure(figsize=(20,20))
|
||||
plt.imshow(image)
|
||||
#show_anns(masks)
|
||||
show_anns(masks2)
|
||||
plt.axis('off')
|
||||
|
||||
return fig2img(plt.gcf())
|
||||
|
||||
|
||||
demo = gr.Interface(fn=segment_image,
|
||||
inputs=gr.Image(),
|
||||
outputs=gr.Image(),
|
||||
title = "图像分割",
|
||||
examples = ['dog.jpg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7027)
|
After Width: | Height: | Size: 98 KiB |
|
@ -0,0 +1,29 @@
|
|||
import torch
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import BlipProcessor, BlipForConditionalGeneration
|
||||
import gradio as gr
|
||||
|
||||
|
||||
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
||||
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=torch.float16).to("cuda")
|
||||
|
||||
|
||||
def image2text(image):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
text = "a photography of"
|
||||
inputs = processor(inp, text, return_tensors="pt").to("cuda", torch.float16)
|
||||
out = model.generate(**inputs)
|
||||
|
||||
return processor.decode(out[0], skip_special_tokens=True)
|
||||
|
||||
|
||||
demo = gr.Interface(fn=image2text,
|
||||
inputs='image',
|
||||
outputs='text',
|
||||
title = "image2text",
|
||||
examples = ['soccer.jpg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7017)
|
After Width: | Height: | Size: 54 KiB |
|
@ -0,0 +1,29 @@
|
|||
import torch
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import BlipProcessor, BlipForConditionalGeneration
|
||||
import gradio as gr
|
||||
|
||||
|
||||
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
|
||||
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large", torch_dtype=torch.float16).to("cuda")
|
||||
|
||||
|
||||
def image2text(image):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
text = "a photography of"
|
||||
inputs = processor(inp, text, return_tensors="pt").to("cuda", torch.float16)
|
||||
out = model.generate(**inputs)
|
||||
|
||||
return processor.decode(out[0], skip_special_tokens=True)
|
||||
|
||||
|
||||
demo = gr.Interface(fn=image2text,
|
||||
inputs='image',
|
||||
outputs='text',
|
||||
title = "image2text",
|
||||
examples = ['soccer.jpg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7018)
|
After Width: | Height: | Size: 54 KiB |
|
@ -0,0 +1,27 @@
|
|||
import torch
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import BlipProcessor, BlipForQuestionAnswering
|
||||
import gradio as gr
|
||||
|
||||
|
||||
processor = BlipProcessor.from_pretrained("ybelkada/blip-vqa-base")
|
||||
model = BlipForQuestionAnswering.from_pretrained("ybelkada/blip-vqa-base", torch_dtype=torch.float16).to("cuda")
|
||||
|
||||
def vqa(image, question):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
inputs = processor(inp, question, return_tensors="pt").to("cuda", torch.float16)
|
||||
out = model.generate(**inputs)
|
||||
|
||||
return processor.decode(out[0], skip_special_tokens=True)
|
||||
|
||||
|
||||
demo = gr.Interface(fn=vqa,
|
||||
inputs=['image', 'text'],
|
||||
outputs='text',
|
||||
title = "vqa",
|
||||
examples = [['soccer.jpg', 'how many people in the picture?']])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7021)
|
After Width: | Height: | Size: 54 KiB |
|
@ -0,0 +1,26 @@
|
|||
import torch
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import BlipProcessor, BlipForQuestionAnswering
|
||||
import gradio as gr
|
||||
|
||||
|
||||
processor = BlipProcessor.from_pretrained("ybelkada/blip-vqa-capfilt-large")
|
||||
model = BlipForQuestionAnswering.from_pretrained("ybelkada/blip-vqa-capfilt-large", torch_dtype=torch.float16).to("cuda")
|
||||
|
||||
def vqa(image, question):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
inputs = processor(inp, question, return_tensors="pt").to("cuda", torch.float16)
|
||||
out = model.generate(**inputs)
|
||||
|
||||
return processor.decode(out[0], skip_special_tokens=True)
|
||||
|
||||
demo = gr.Interface(fn=vqa,
|
||||
inputs=['image', 'text'],
|
||||
outputs='text',
|
||||
title = "vqa",
|
||||
examples = [['soccer.jpg', 'how many people in the picture?']])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7025)
|
After Width: | Height: | Size: 54 KiB |
|
@ -0,0 +1,29 @@
|
|||
import torch
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
||||
import gradio as gr
|
||||
|
||||
|
||||
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
||||
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16).to("cuda")
|
||||
|
||||
|
||||
def image2text(image):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
text = "a photography of"
|
||||
inputs = processor(inp, text, return_tensors="pt").to("cuda", torch.float16)
|
||||
out = model.generate(**inputs)
|
||||
|
||||
return processor.decode(out[0], skip_special_tokens=True)
|
||||
|
||||
|
||||
demo = gr.Interface(fn=image2text,
|
||||
inputs='image',
|
||||
outputs='text',
|
||||
title = "image2text",
|
||||
examples = ['soccer.jpg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7019)
|
After Width: | Height: | Size: 54 KiB |
|
@ -0,0 +1,23 @@
|
|||
import gradio as gr
|
||||
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
||||
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("merve/chatgpt-prompts-bart-long")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained("merve/chatgpt-prompts-bart-long", from_tf=True)
|
||||
|
||||
def generate(prompt):
|
||||
batch = tokenizer(prompt, return_tensors="pt")
|
||||
generated_ids = model.generate(batch["input_ids"], max_new_tokens=150)
|
||||
output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
|
||||
return output[0]
|
||||
|
||||
demo = gr.Interface(fn=generate,
|
||||
inputs='text',
|
||||
outputs='text',
|
||||
title = "generate prompt",
|
||||
examples = [["photographer"], ["developer"]])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7020)
|
|
@ -0,0 +1,3 @@
|
|||
tensorflow
|
||||
transformers
|
||||
torch
|
|
@ -0,0 +1,50 @@
|
|||
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
||||
import gradio as gr
|
||||
from PIL import Image
|
||||
import torch
|
||||
import re
|
||||
|
||||
|
||||
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
|
||||
model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
|
||||
|
||||
|
||||
def vqa(image, question):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
pixel_values = processor(inp, return_tensors="pt").pixel_values
|
||||
task_prompt = "{user_input}"
|
||||
prompt = task_prompt.replace("{user_input}", question)
|
||||
decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt")["input_ids"]
|
||||
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
model.to(device)
|
||||
|
||||
outputs = model.generate(pixel_values.to(device),
|
||||
decoder_input_ids=decoder_input_ids.to(device),
|
||||
max_length=model.decoder.config.max_position_embeddings,
|
||||
early_stopping=True,
|
||||
pad_token_id=processor.tokenizer.pad_token_id,
|
||||
eos_token_id=processor.tokenizer.eos_token_id,
|
||||
use_cache=True,
|
||||
num_beams=1,
|
||||
bad_words_ids=[[processor.tokenizer.unk_token_id]],
|
||||
return_dict_in_generate=True,
|
||||
output_scores=True)
|
||||
|
||||
|
||||
seq = processor.batch_decode(outputs.sequences)[0]
|
||||
seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
|
||||
seq = re.sub(r"<.*?>", "", seq, count=1).strip()
|
||||
|
||||
return processor.token2json(seq)
|
||||
|
||||
|
||||
demo = gr.Interface(fn=vqa,
|
||||
inputs=['image', 'text'],
|
||||
outputs='text',
|
||||
title = "vqa",
|
||||
examples = [['income.png', 'What are the 2020 net sales?'], ['invoice.png','What is the invoice number?']])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7026)
|
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 24 KiB |
|
@ -0,0 +1 @@
|
|||
pip install protobuf==3.20.*
|
|
@ -0,0 +1,26 @@
|
|||
import gradio as gr
|
||||
from transformers import pipeline
|
||||
|
||||
|
||||
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
|
||||
|
||||
def sentiment_analysis(text):
|
||||
results = classifier(text)
|
||||
total_result = ""
|
||||
|
||||
for result in results[0]:
|
||||
total_result += f"Sentiment: {result.get('label')}, Score: {result.get('score'):.2f}"
|
||||
total_result += '\r\n'
|
||||
|
||||
return total_result
|
||||
|
||||
demo = gr.Interface(fn=sentiment_analysis,
|
||||
inputs='text',
|
||||
outputs='text',
|
||||
title = "文本情感分析"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7028)
|
|
@ -0,0 +1,31 @@
|
|||
from transformers import AutoProcessor, AutoModelForCausalLM
|
||||
from huggingface_hub import hf_hub_download
|
||||
from PIL import Image
|
||||
import gradio as gr
|
||||
import torch
|
||||
|
||||
|
||||
processor = AutoProcessor.from_pretrained("microsoft/git-base-textvqa")
|
||||
model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-textvqa")
|
||||
|
||||
|
||||
def vqa(image, question):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
pixel_values = processor(images=inp, return_tensors="pt").pixel_values
|
||||
|
||||
input_ids = processor(text=question, add_special_tokens=False).input_ids
|
||||
input_ids = [processor.tokenizer.cls_token_id] + input_ids
|
||||
input_ids = torch.tensor(input_ids).unsqueeze(0)
|
||||
|
||||
generated_ids = model.generate(pixel_values=pixel_values, input_ids=input_ids, max_length=50)
|
||||
return processor.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
|
||||
demo = gr.Interface(fn=vqa,
|
||||
inputs=['image', 'text'],
|
||||
outputs='text',
|
||||
title = "vqa",
|
||||
examples = [['soccer.jpg', 'how many people in the picture?']])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7024)
|
After Width: | Height: | Size: 54 KiB |
|
@ -0,0 +1,31 @@
|
|||
from transformers import AutoProcessor, AutoModelForCausalLM
|
||||
from huggingface_hub import hf_hub_download
|
||||
from PIL import Image
|
||||
import gradio as gr
|
||||
import torch
|
||||
|
||||
|
||||
processor = AutoProcessor.from_pretrained("microsoft/git-base-vqav2")
|
||||
model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-vqav2")
|
||||
|
||||
|
||||
def vqa(image, question):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
pixel_values = processor(images=inp, return_tensors="pt").pixel_values
|
||||
|
||||
input_ids = processor(text=question, add_special_tokens=False).input_ids
|
||||
input_ids = [processor.tokenizer.cls_token_id] + input_ids
|
||||
input_ids = torch.tensor(input_ids).unsqueeze(0)
|
||||
|
||||
generated_ids = model.generate(pixel_values=pixel_values, input_ids=input_ids, max_length=50)
|
||||
return processor.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
|
||||
demo = gr.Interface(fn=vqa,
|
||||
inputs=['image', 'text'],
|
||||
outputs='text',
|
||||
title = "vqa",
|
||||
examples = [['soccer.jpg', 'how many people in the picture?']])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7024)
|
After Width: | Height: | Size: 54 KiB |
|
@ -0,0 +1,31 @@
|
|||
from transformers import AutoProcessor, AutoModelForCausalLM
|
||||
from huggingface_hub import hf_hub_download
|
||||
from PIL import Image
|
||||
import gradio as gr
|
||||
import torch
|
||||
|
||||
|
||||
processor = AutoProcessor.from_pretrained("microsoft/git-large-vqav2")
|
||||
model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-vqav2")
|
||||
|
||||
|
||||
def vqa(image, question):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
pixel_values = processor(images=inp, return_tensors="pt").pixel_values
|
||||
|
||||
input_ids = processor(text=question, add_special_tokens=False).input_ids
|
||||
input_ids = [processor.tokenizer.cls_token_id] + input_ids
|
||||
input_ids = torch.tensor(input_ids).unsqueeze(0)
|
||||
|
||||
generated_ids = model.generate(pixel_values=pixel_values, input_ids=input_ids, max_length=50)
|
||||
return processor.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
|
||||
demo = gr.Interface(fn=vqa,
|
||||
inputs=['image', 'text'],
|
||||
outputs='text',
|
||||
title = "vqa",
|
||||
examples = [['soccer.jpg', 'how many people in the picture?']])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7024)
|
After Width: | Height: | Size: 54 KiB |
|
@ -0,0 +1,25 @@
|
|||
from transformers import pipeline
|
||||
import gradio as gr
|
||||
from PIL import Image
|
||||
|
||||
|
||||
nlp = pipeline(
|
||||
"document-question-answering",
|
||||
model="impira/layoutlm-document-qa",
|
||||
)
|
||||
|
||||
def vqa(image, question):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
|
||||
return nlp(inp, question)
|
||||
|
||||
|
||||
demo = gr.Interface(fn=vqa,
|
||||
inputs=['image', 'text'],
|
||||
outputs='text',
|
||||
title = "vqa",
|
||||
examples = [['income.png', 'What are the 2020 net sales?'], ['invoice.png','What is the invoice number?']])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7025)
|
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 24 KiB |
|
@ -0,0 +1,2 @@
|
|||
pytesseract
|
||||
tesseract
|
|
@ -0,0 +1,35 @@
|
|||
import gradio as gr
|
||||
from transformers import AutoProcessor, LayoutLMv2ForQuestionAnswering, set_seed
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
|
||||
set_seed(88)
|
||||
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||
model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||
|
||||
|
||||
def vqa(image, question):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
encoding = processor(inp, question, return_tensors="pt")
|
||||
|
||||
outputs = model(**encoding)
|
||||
predicted_start_idx = outputs.start_logits.argmax(-1).item()
|
||||
predicted_end_idx = outputs.end_logits.argmax(-1).item()
|
||||
predicted_start_idx, predicted_end_idx
|
||||
|
||||
predicted_answer_tokens = encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1]
|
||||
predicted_answer = processor.tokenizer.decode(predicted_answer_tokens)
|
||||
|
||||
return predicted_answer
|
||||
|
||||
|
||||
demo = gr.Interface(fn=vqa,
|
||||
inputs=['image', 'text'],
|
||||
outputs='text',
|
||||
title = "vqa",
|
||||
examples = [['income.png', 'What are the 2020 net sales?'], ['invoice.png','What is the invoice number?']])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7026)
|
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 24 KiB |
|
@ -0,0 +1,33 @@
|
|||
import gradio as gr
|
||||
import torch
|
||||
from transformers import LayoutLMv3Processor, LayoutLMv3ForQuestionAnswering
|
||||
from PIL import Image
|
||||
|
||||
|
||||
processor = LayoutLMv3Processor.from_pretrained("rubentito/layoutlmv3-base-mpdocvqa", apply_ocr=False)
|
||||
model = LayoutLMv3ForQuestionAnswering.from_pretrained("rubentito/layoutlmv3-base-mpdocvqa")
|
||||
|
||||
|
||||
def vqa(image, question):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
|
||||
context = ["Example"]
|
||||
boxes = [0, 0, 1000, 1000] # This is an example bounding box covering the whole image.
|
||||
document_encoding = processor(inp, question, context, boxes=boxes, return_tensors="pt")
|
||||
outputs = model(**document_encoding)
|
||||
|
||||
start_idx = torch.argmax(outputs.start_logits, axis=1)
|
||||
end_idx = torch.argmax(outputs.end_logits, axis=1)
|
||||
answers = self.processor.tokenizer.decode(input_tokens[start_idx: end_idx+1]).strip()
|
||||
|
||||
return answers
|
||||
|
||||
demo = gr.Interface(fn=vqa,
|
||||
inputs=['image', 'text'],
|
||||
outputs='text',
|
||||
title = "vqa",
|
||||
examples = [['income.png', 'What are the 2020 net sales?'], ['invoice.png','What is the invoice number?']])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7026)
|
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 24 KiB |
|
@ -0,0 +1,26 @@
|
|||
import gradio as gr
|
||||
from transformers import AutoImageProcessor, ResNetForImageClassification
|
||||
import torch
|
||||
|
||||
|
||||
processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
|
||||
model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")
|
||||
|
||||
def image_classification(image):
|
||||
inputs = processor(image, return_tensors="pt")
|
||||
with torch.no_grad():
|
||||
logits = model(**inputs).logits
|
||||
predicted_label = logits.argmax(-1).item()
|
||||
|
||||
return model.config.id2label[predicted_label]
|
||||
|
||||
demo = gr.Interface(fn=image_classification,
|
||||
inputs=gr.Image(),
|
||||
outputs=gr.Label(num_top_classes=1),
|
||||
title = "图像分类",
|
||||
examples = ['dog.jpeg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7027)
|
After Width: | Height: | Size: 53 KiB |
|
@ -0,0 +1,23 @@
|
|||
from diffusers.models import AutoencoderKL
|
||||
from diffusers import StableDiffusionPipeline
|
||||
import gradio as gr
|
||||
|
||||
model = "CompVis/stable-diffusion-v1-4"
|
||||
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse")
|
||||
pipe = StableDiffusionPipeline.from_pretrained(model, vae=vae)
|
||||
|
||||
def text2image(prompt):
|
||||
image = pipe(prompt).images[0]
|
||||
|
||||
return image
|
||||
|
||||
|
||||
demo = gr.Interface(fn=text2image,
|
||||
inputs='text',
|
||||
outputs='image',
|
||||
title = "text2image",
|
||||
examples = ['a photo of an astronaut riding a horse on mars'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7015)
|
|
@ -0,0 +1,29 @@
|
|||
import gradio as gr
|
||||
from transformers import pipeline, AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
|
||||
|
||||
modelName="Seethal/sentiment_analysis_generic_dataset"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(modelName)
|
||||
model = AutoModelForSequenceClassification.from_pretrained(modelName)
|
||||
sentimentPipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
|
||||
Label2Des = {
|
||||
"LABEL_0": "NEGATIVE",
|
||||
"LABEL_1": "NEUTRAL",
|
||||
"LABEL_2": "POSITIVE"
|
||||
}
|
||||
|
||||
def sentiment_analysis(text):
|
||||
results = sentimentPipeline(text)
|
||||
|
||||
return f"Sentiment: {Label2Des.get(results[0]['label'])}, Score: {results[0]['score']:.2f}"
|
||||
|
||||
demo = gr.Interface(fn=sentiment_analysis,
|
||||
inputs='text',
|
||||
outputs='text',
|
||||
title = "文本情感分析"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7028)
|
|
@ -0,0 +1,29 @@
|
|||
import gradio as gr
|
||||
import torch
|
||||
import gc
|
||||
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
|
||||
import os
|
||||
|
||||
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"
|
||||
|
||||
model_id = "stabilityai/stable-diffusion-2-1"
|
||||
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
|
||||
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
|
||||
pipe = pipe.to("cuda")
|
||||
|
||||
|
||||
def text2image(prompt):
|
||||
image = pipe(prompt).images[0]
|
||||
|
||||
return image
|
||||
|
||||
|
||||
demo = gr.Interface(fn=text2image,
|
||||
inputs='text',
|
||||
outputs='image',
|
||||
title = "text2image",
|
||||
examples = ['a photo of an astronaut riding a horse on mars'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7013)
|
|
@ -0,0 +1,25 @@
|
|||
import gradio as gr
|
||||
from diffusers import StableDiffusionPipeline
|
||||
import torch
|
||||
|
||||
|
||||
model_id = "CompVis/stable-diffusion-v1-4"
|
||||
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
|
||||
pipe = pipe.to("cuda")
|
||||
|
||||
|
||||
def text2image(prompt):
|
||||
image = pipe(prompt).images[0]
|
||||
|
||||
return image
|
||||
|
||||
|
||||
demo = gr.Interface(fn=text2image,
|
||||
inputs='text',
|
||||
outputs='image',
|
||||
title = "text2image",
|
||||
examples = ['a photo of an astronaut riding a horse on mars'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7014)
|
|
@ -0,0 +1,25 @@
|
|||
import gradio as gr
|
||||
from diffusers import StableDiffusionPipeline
|
||||
import torch
|
||||
|
||||
|
||||
model_id = "runwayml/stable-diffusion-v1-5"
|
||||
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
|
||||
pipe = pipe.to("cuda")
|
||||
|
||||
|
||||
def text2image(prompt):
|
||||
image = pipe(prompt).images[0]
|
||||
|
||||
return image
|
||||
|
||||
|
||||
demo = gr.Interface(fn=text2image,
|
||||
inputs='text',
|
||||
outputs='image',
|
||||
title = "text2image",
|
||||
examples = ['a photo of an astronaut riding a horse on mars'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7012)
|
|
@ -0,0 +1,28 @@
|
|||
import gradio as gr
|
||||
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
||||
|
||||
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-3b")
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-3b")
|
||||
|
||||
def translation(english, language):
|
||||
if language == 'German':
|
||||
input_ids = tokenizer("translate English to German: " + english, return_tensors="pt").input_ids
|
||||
elif language == 'French':
|
||||
input_ids = tokenizer("translate English to French: " + english, return_tensors="pt").input_ids
|
||||
else:
|
||||
input_ids = tokenizer("translate English to Romanian: " + english, return_tensors="pt").input_ids
|
||||
outputs = model.generate(input_ids)
|
||||
|
||||
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
|
||||
demo = gr.Interface(fn=translation,
|
||||
inputs=['text', gr.inputs.Radio(['German','French','Romanian'], type='value', default='German', label='language')],
|
||||
outputs='text',
|
||||
title = "翻译"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7028)
|
|
@ -0,0 +1,28 @@
|
|||
import gradio as gr
|
||||
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
||||
|
||||
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-base")
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-base")
|
||||
|
||||
def translation(english, language):
|
||||
if language == 'German':
|
||||
input_ids = tokenizer("translate English to German: " + english, return_tensors="pt").input_ids
|
||||
elif language == 'French':
|
||||
input_ids = tokenizer("translate English to French: " + english, return_tensors="pt").input_ids
|
||||
else:
|
||||
input_ids = tokenizer("translate English to Romanian: " + english, return_tensors="pt").input_ids
|
||||
outputs = model.generate(input_ids)
|
||||
|
||||
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
|
||||
demo = gr.Interface(fn=translation,
|
||||
inputs=['text', gr.inputs.Radio(['German','French','Romanian'], type='value', default='German', label='language')],
|
||||
outputs='text',
|
||||
title = "翻译"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7028)
|
|
@ -0,0 +1,28 @@
|
|||
import gradio as gr
|
||||
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
||||
|
||||
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-large")
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-large")
|
||||
|
||||
def translation(english, language):
|
||||
if language == 'German':
|
||||
input_ids = tokenizer("translate English to German: " + english, return_tensors="pt").input_ids
|
||||
elif language == 'French':
|
||||
input_ids = tokenizer("translate English to French: " + english, return_tensors="pt").input_ids
|
||||
else:
|
||||
input_ids = tokenizer("translate English to Romanian: " + english, return_tensors="pt").input_ids
|
||||
outputs = model.generate(input_ids)
|
||||
|
||||
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
|
||||
demo = gr.Interface(fn=translation,
|
||||
inputs=['text', gr.inputs.Radio(['German','French','Romanian'], type='value', default='German', label='language')],
|
||||
outputs='text',
|
||||
title = "翻译"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7028)
|
|
@ -0,0 +1,28 @@
|
|||
import gradio as gr
|
||||
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
||||
|
||||
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
model = T5ForConditionalGeneration.from_pretrained("t5-small")
|
||||
|
||||
def translation(english, language):
|
||||
if language == 'German':
|
||||
input_ids = tokenizer("translate English to German: " + english, return_tensors="pt").input_ids
|
||||
elif language == 'French':
|
||||
input_ids = tokenizer("translate English to French: " + english, return_tensors="pt").input_ids
|
||||
else:
|
||||
input_ids = tokenizer("translate English to Romanian: " + english, return_tensors="pt").input_ids
|
||||
outputs = model.generate(input_ids)
|
||||
|
||||
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
|
||||
demo = gr.Interface(fn=translation,
|
||||
inputs=['text', gr.inputs.Radio(['German','French','Romanian'], type='value', default='German', label='language')],
|
||||
outputs='text',
|
||||
title = "翻译"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7028)
|
|
@ -0,0 +1,26 @@
|
|||
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
||||
from PIL import Image
|
||||
import requests
|
||||
import gradio as gr
|
||||
|
||||
|
||||
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
|
||||
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
|
||||
|
||||
def ocr(image):
|
||||
pixel_values = processor(images=image, return_tensors="pt").pixel_values
|
||||
generated_ids = model.generate(pixel_values)
|
||||
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
|
||||
return generated_text
|
||||
|
||||
|
||||
demo = gr.Interface(fn=ocr,
|
||||
inputs='image',
|
||||
outputs='text',
|
||||
title = "ocr",
|
||||
examples = ['handwritten.jpeg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7010)
|
After Width: | Height: | Size: 4.2 KiB |
|
@ -0,0 +1,26 @@
|
|||
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
||||
from PIL import Image
|
||||
import requests
|
||||
import gradio as gr
|
||||
|
||||
|
||||
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed')
|
||||
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed')
|
||||
|
||||
def ocr(image):
|
||||
pixel_values = processor(images=image, return_tensors="pt").pixel_values
|
||||
generated_ids = model.generate(pixel_values)
|
||||
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
|
||||
return generated_text
|
||||
|
||||
|
||||
demo = gr.Interface(fn=ocr,
|
||||
inputs='image',
|
||||
outputs='text',
|
||||
title = "ocr",
|
||||
examples = ['printed.jpg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7011)
|
After Width: | Height: | Size: 3.7 KiB |
|
@ -0,0 +1,22 @@
|
|||
import gradio as gr
|
||||
from transformers import pipeline
|
||||
|
||||
|
||||
model_path = "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
||||
sentiment_task = pipeline("sentiment-analysis", model=model_path, tokenizer=model_path)
|
||||
|
||||
def sentiment_analysis(text):
|
||||
results = sentiment_task(text)
|
||||
|
||||
return results
|
||||
|
||||
demo = gr.Interface(fn=sentiment_analysis,
|
||||
inputs='text',
|
||||
outputs='text',
|
||||
title = "文本情感分析"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7028)
|
|
@ -0,0 +1,22 @@
|
|||
import gradio as gr
|
||||
from transformers import pipeline
|
||||
|
||||
|
||||
model_path = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
|
||||
sentiment_task = pipeline("sentiment-analysis", model=model_path, tokenizer=model_path)
|
||||
|
||||
def sentiment_analysis(text):
|
||||
results = sentiment_task(text)
|
||||
|
||||
return results
|
||||
|
||||
demo = gr.Interface(fn=sentiment_analysis,
|
||||
inputs='text',
|
||||
outputs='text',
|
||||
title = "文本情感分析"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7028)
|
|
@ -0,0 +1,31 @@
|
|||
from transformers import ViltProcessor, ViltForQuestionAnswering
|
||||
import requests
|
||||
from PIL import Image
|
||||
import gradio as gr
|
||||
import torch
|
||||
|
||||
|
||||
processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
||||
model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
||||
|
||||
|
||||
def vqa(image, question):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
inputs = processor(inp, question, return_tensors="pt")
|
||||
|
||||
outputs = model(**inputs)
|
||||
logits = outputs.logits
|
||||
idx = logits.argmax(-1).item()
|
||||
|
||||
return model.config.id2label[idx]
|
||||
|
||||
|
||||
demo = gr.Interface(fn=vqa,
|
||||
inputs=['image', 'text'],
|
||||
outputs='text',
|
||||
title = "vqa",
|
||||
examples = [['soccer.jpg', 'how many people in the picture?']])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7023)
|
After Width: | Height: | Size: 54 KiB |
|
@ -0,0 +1,26 @@
|
|||
import gradio as gr
|
||||
import torch
|
||||
from transformers import ViTFeatureExtractor, ViTForImageClassification
|
||||
|
||||
|
||||
model = ViTForImageClassification.from_pretrained('nateraw/vit-age-classifier')
|
||||
transforms = ViTFeatureExtractor.from_pretrained('nateraw/vit-age-classifier')
|
||||
|
||||
def image_classification(image):
|
||||
inputs = transforms(image, return_tensors='pt')
|
||||
logits = model(**inputs).logits
|
||||
|
||||
predicted_label = logits.argmax(-1).item()
|
||||
return model.config.id2label[predicted_label]
|
||||
|
||||
|
||||
demo = gr.Interface(fn=image_classification,
|
||||
inputs=gr.Image(),
|
||||
outputs='text',
|
||||
title = "年龄划分",
|
||||
examples = ['dog.jpeg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7027)
|
After Width: | Height: | Size: 53 KiB |
|
@ -0,0 +1,39 @@
|
|||
import torch
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import ViTFeatureExtractor, AutoTokenizer, VisionEncoderDecoderModel
|
||||
import gradio as gr
|
||||
|
||||
|
||||
loc = "ydshieh/vit-gpt2-coco-en"
|
||||
|
||||
feature_extractor = ViTFeatureExtractor.from_pretrained(loc)
|
||||
tokenizer = AutoTokenizer.from_pretrained(loc)
|
||||
model = VisionEncoderDecoderModel.from_pretrained(loc)
|
||||
model.eval()
|
||||
|
||||
|
||||
def predict(image):
|
||||
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
|
||||
|
||||
with torch.no_grad():
|
||||
output_ids = model.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True).sequences
|
||||
|
||||
preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
||||
|
||||
total_caption = ""
|
||||
for pred in preds:
|
||||
total_caption = total_caption + pred.strip()
|
||||
total_caption = total_caption + "\r\n"
|
||||
|
||||
return total_caption
|
||||
|
||||
demo = gr.Interface(fn=predict,
|
||||
inputs='image',
|
||||
outputs='text',
|
||||
title = "image2text",
|
||||
examples = ['soccer.jpg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7019)
|
After Width: | Height: | Size: 54 KiB |
|
@ -0,0 +1,29 @@
|
|||
from transformers import pipeline
|
||||
import gradio as gr
|
||||
import cv2
|
||||
from PIL import Image
|
||||
|
||||
|
||||
image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
||||
|
||||
|
||||
def ocr(image):
|
||||
inp = Image.fromarray(image.astype('uint8'), 'RGB')
|
||||
text = image_to_text(inp)
|
||||
|
||||
total_caption = ""
|
||||
for caption in text:
|
||||
total_caption = total_caption + caption.get('generated_text')
|
||||
total_caption = total_caption + '\r\n'
|
||||
|
||||
return total_caption
|
||||
|
||||
demo = gr.Interface(fn=ocr,
|
||||
inputs='image',
|
||||
outputs='text',
|
||||
title = "image2text",
|
||||
examples = ['soccer.jpg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7016)
|
After Width: | Height: | Size: 54 KiB |
|
@ -0,0 +1,24 @@
|
|||
import gradio as gr
|
||||
from transformers import ViTImageProcessor, ViTModel
|
||||
|
||||
processor = ViTImageProcessor.from_pretrained('google/vit-large-patch16-224-in21k')
|
||||
model = ViTModel.from_pretrained('google/vit-large-patch16-224-in21k')
|
||||
|
||||
def image_classification(image):
|
||||
inputs = processor(images=image, return_tensors="pt")
|
||||
outputs = model(**inputs)
|
||||
logits = outputs.logits
|
||||
predicted_label = logits.argmax(-1).item()
|
||||
|
||||
return model.config.id2label[predicted_label]
|
||||
|
||||
demo = gr.Interface(fn=image_classification,
|
||||
inputs=gr.Image(),
|
||||
outputs=gr.Label(num_top_classes=1),
|
||||
title = "图像分类",
|
||||
examples = ['dog.jpeg'])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue(concurrency_count=3)
|
||||
demo.launch(server_name = "0.0.0.0", server_port = 7027)
|
After Width: | Height: | Size: 53 KiB |