add model src

This commit is contained in:
songw 2023-04-10 16:14:17 +08:00
parent c04940b523
commit 75c65825b6
69 changed files with 1221 additions and 0 deletions

33
AnimeGANv2/app.py Normal file
View File

@ -0,0 +1,33 @@
import gradio as gr
import torch
model2 = torch.hub.load(
"AK391/animegan2-pytorch:main",
"generator",
pretrained=True,
progress=False
)
model1 = torch.hub.load("AK391/animegan2-pytorch:main", "generator", pretrained="face_paint_512_v1")
face2paint = torch.hub.load(
'AK391/animegan2-pytorch:main', 'face2paint',
size=512,side_by_side=False
)
def inference(img, ver):
if ver == 'version 2 (🔺 robustness,🔻 stylization)':
out = face2paint(model2, img)
else:
out = face2paint(model1, img)
return out
title = "动漫风格迁移"
examples=[['groot.jpeg','version 2 (🔺 robustness,🔻 stylization)'],['gongyoo.jpeg','version 1 (🔺 stylization, 🔻 robustness)']]
demo = gr.Interface(
fn=inference,
inputs=[gr.inputs.Image(type="pil"),gr.inputs.Radio(['version 1 (🔺 stylization, 🔻 robustness)','version 2 (🔺 robustness,🔻 stylization)'], type="value", default='version 2 (🔺 robustness,🔻 stylization)', label='version')],
outputs=gr.outputs.Image(type="pil"),
title=title,
examples=examples)
demo.launch(server_name = "0.0.0.0", server_port = 7022)

BIN
AnimeGANv2/gongyoo.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
AnimeGANv2/groot.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 342 KiB

81
ViT-B-SAM/app.py Normal file
View File

@ -0,0 +1,81 @@
import gradio as gr
import numpy as np
import torch
import matplotlib.pyplot as plt
import cv2
import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
from PIL import Image
import io
sam_checkpoint = "sam_vit_b_01ec64.pth"
model_type = "vit_b"
device = "cuda"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
mask_generator = SamAutomaticMaskGenerator(sam)
mask_generator_2 = SamAutomaticMaskGenerator(
model=sam,
points_per_side=32,
pred_iou_thresh=0.86,
stability_score_thresh=0.92,
crop_n_layers=1,
crop_n_points_downscale_factor=2,
min_mask_region_area=100, # Requires open-cv to run post-processing
)
def fig2img(fig):
buf = io.BytesIO()
fig.savefig(buf)
buf.seek(0)
img = Image.open(buf)
return img
def show_anns(anns):
if len(anns) == 0:
return
sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
ax = plt.gca()
ax.set_autoscale_on(False)
polygons = []
color = []
for ann in sorted_anns:
m = ann['segmentation']
img = np.ones((m.shape[0], m.shape[1], 3))
color_mask = np.random.random((1, 3)).tolist()[0]
for i in range(3):
img[:,:,i] = color_mask[i]
ax.imshow(np.dstack((img, m*0.35)))
def segment_image(image):
image = image.astype('uint8')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#masks = mask_generator.generate(image)
masks2 = mask_generator_2.generate(image)
plt.figure(figsize=(20,20))
plt.imshow(image)
#show_anns(masks)
show_anns(masks2)
plt.axis('off')
return fig2img(plt.gcf())
demo = gr.Interface(fn=segment_image,
inputs=gr.Image(),
outputs=gr.Image(),
title = "图像分割",
examples = ['dog.jpg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7027)

BIN
ViT-B-SAM/dog.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

81
ViT-H-SAM/app.py Normal file
View File

@ -0,0 +1,81 @@
import gradio as gr
import numpy as np
import torch
import matplotlib.pyplot as plt
import cv2
import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
from PIL import Image
import io
sam_checkpoint = "sam_vit_h_4b8939.pth"
model_type = "vit_h"
device = "cuda"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
mask_generator = SamAutomaticMaskGenerator(sam)
mask_generator_2 = SamAutomaticMaskGenerator(
model=sam,
points_per_side=32,
pred_iou_thresh=0.86,
stability_score_thresh=0.92,
crop_n_layers=1,
crop_n_points_downscale_factor=2,
min_mask_region_area=100, # Requires open-cv to run post-processing
)
def fig2img(fig):
buf = io.BytesIO()
fig.savefig(buf)
buf.seek(0)
img = Image.open(buf)
return img
def show_anns(anns):
if len(anns) == 0:
return
sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
ax = plt.gca()
ax.set_autoscale_on(False)
polygons = []
color = []
for ann in sorted_anns:
m = ann['segmentation']
img = np.ones((m.shape[0], m.shape[1], 3))
color_mask = np.random.random((1, 3)).tolist()[0]
for i in range(3):
img[:,:,i] = color_mask[i]
ax.imshow(np.dstack((img, m*0.35)))
def segment_image(image):
image = image.astype('uint8')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#masks = mask_generator.generate(image)
masks2 = mask_generator_2.generate(image)
plt.figure(figsize=(20,20))
plt.imshow(image)
#show_anns(masks)
show_anns(masks2)
plt.axis('off')
return fig2img(plt.gcf())
demo = gr.Interface(fn=segment_image,
inputs=gr.Image(),
outputs=gr.Image(),
title = "图像分割",
examples = ['dog.jpg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7027)

BIN
ViT-H-SAM/dog.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

81
ViT-L-SAM/app.py Normal file
View File

@ -0,0 +1,81 @@
import gradio as gr
import numpy as np
import torch
import matplotlib.pyplot as plt
import cv2
import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
from PIL import Image
import io
sam_checkpoint = "sam_vit_l_0b3195.pth"
model_type = "vit_l"
device = "cuda"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
mask_generator = SamAutomaticMaskGenerator(sam)
mask_generator_2 = SamAutomaticMaskGenerator(
model=sam,
points_per_side=32,
pred_iou_thresh=0.86,
stability_score_thresh=0.92,
crop_n_layers=1,
crop_n_points_downscale_factor=2,
min_mask_region_area=100, # Requires open-cv to run post-processing
)
def fig2img(fig):
buf = io.BytesIO()
fig.savefig(buf)
buf.seek(0)
img = Image.open(buf)
return img
def show_anns(anns):
if len(anns) == 0:
return
sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
ax = plt.gca()
ax.set_autoscale_on(False)
polygons = []
color = []
for ann in sorted_anns:
m = ann['segmentation']
img = np.ones((m.shape[0], m.shape[1], 3))
color_mask = np.random.random((1, 3)).tolist()[0]
for i in range(3):
img[:,:,i] = color_mask[i]
ax.imshow(np.dstack((img, m*0.35)))
def segment_image(image):
image = image.astype('uint8')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#masks = mask_generator.generate(image)
masks2 = mask_generator_2.generate(image)
plt.figure(figsize=(20,20))
plt.imshow(image)
#show_anns(masks)
show_anns(masks2)
plt.axis('off')
return fig2img(plt.gcf())
demo = gr.Interface(fn=segment_image,
inputs=gr.Image(),
outputs=gr.Image(),
title = "图像分割",
examples = ['dog.jpg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7027)

BIN
ViT-L-SAM/dog.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

View File

@ -0,0 +1,29 @@
import torch
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import gradio as gr
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=torch.float16).to("cuda")
def image2text(image):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
text = "a photography of"
inputs = processor(inp, text, return_tensors="pt").to("cuda", torch.float16)
out = model.generate(**inputs)
return processor.decode(out[0], skip_special_tokens=True)
demo = gr.Interface(fn=image2text,
inputs='image',
outputs='text',
title = "image2text",
examples = ['soccer.jpg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7017)

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

View File

@ -0,0 +1,29 @@
import torch
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import gradio as gr
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large", torch_dtype=torch.float16).to("cuda")
def image2text(image):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
text = "a photography of"
inputs = processor(inp, text, return_tensors="pt").to("cuda", torch.float16)
out = model.generate(**inputs)
return processor.decode(out[0], skip_special_tokens=True)
demo = gr.Interface(fn=image2text,
inputs='image',
outputs='text',
title = "image2text",
examples = ['soccer.jpg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7018)

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

27
blip-vqa-base/app.py Normal file
View File

@ -0,0 +1,27 @@
import torch
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForQuestionAnswering
import gradio as gr
processor = BlipProcessor.from_pretrained("ybelkada/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained("ybelkada/blip-vqa-base", torch_dtype=torch.float16).to("cuda")
def vqa(image, question):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
inputs = processor(inp, question, return_tensors="pt").to("cuda", torch.float16)
out = model.generate(**inputs)
return processor.decode(out[0], skip_special_tokens=True)
demo = gr.Interface(fn=vqa,
inputs=['image', 'text'],
outputs='text',
title = "vqa",
examples = [['soccer.jpg', 'how many people in the picture?']])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7021)

BIN
blip-vqa-base/soccer.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

View File

@ -0,0 +1,26 @@
import torch
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForQuestionAnswering
import gradio as gr
processor = BlipProcessor.from_pretrained("ybelkada/blip-vqa-capfilt-large")
model = BlipForQuestionAnswering.from_pretrained("ybelkada/blip-vqa-capfilt-large", torch_dtype=torch.float16).to("cuda")
def vqa(image, question):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
inputs = processor(inp, question, return_tensors="pt").to("cuda", torch.float16)
out = model.generate(**inputs)
return processor.decode(out[0], skip_special_tokens=True)
demo = gr.Interface(fn=vqa,
inputs=['image', 'text'],
outputs='text',
title = "vqa",
examples = [['soccer.jpg', 'how many people in the picture?']])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7025)

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

29
blip2-opt-2.7b/app.py Normal file
View File

@ -0,0 +1,29 @@
import torch
import requests
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration
import gradio as gr
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16).to("cuda")
def image2text(image):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
text = "a photography of"
inputs = processor(inp, text, return_tensors="pt").to("cuda", torch.float16)
out = model.generate(**inputs)
return processor.decode(out[0], skip_special_tokens=True)
demo = gr.Interface(fn=image2text,
inputs='image',
outputs='text',
title = "image2text",
examples = ['soccer.jpg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7019)

BIN
blip2-opt-2.7b/soccer.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

View File

@ -0,0 +1,23 @@
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("merve/chatgpt-prompts-bart-long")
model = AutoModelForSeq2SeqLM.from_pretrained("merve/chatgpt-prompts-bart-long", from_tf=True)
def generate(prompt):
batch = tokenizer(prompt, return_tensors="pt")
generated_ids = model.generate(batch["input_ids"], max_new_tokens=150)
output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
return output[0]
demo = gr.Interface(fn=generate,
inputs='text',
outputs='text',
title = "generate prompt",
examples = [["photographer"], ["developer"]])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7020)

View File

@ -0,0 +1,3 @@
tensorflow
transformers
torch

View File

@ -0,0 +1,50 @@
from transformers import DonutProcessor, VisionEncoderDecoderModel
import gradio as gr
from PIL import Image
import torch
import re
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
def vqa(image, question):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
pixel_values = processor(inp, return_tensors="pt").pixel_values
task_prompt = "{user_input}"
prompt = task_prompt.replace("{user_input}", question)
decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt")["input_ids"]
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
outputs = model.generate(pixel_values.to(device),
decoder_input_ids=decoder_input_ids.to(device),
max_length=model.decoder.config.max_position_embeddings,
early_stopping=True,
pad_token_id=processor.tokenizer.pad_token_id,
eos_token_id=processor.tokenizer.eos_token_id,
use_cache=True,
num_beams=1,
bad_words_ids=[[processor.tokenizer.unk_token_id]],
return_dict_in_generate=True,
output_scores=True)
seq = processor.batch_decode(outputs.sequences)[0]
seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
seq = re.sub(r"<.*?>", "", seq, count=1).strip()
return processor.token2json(seq)
demo = gr.Interface(fn=vqa,
inputs=['image', 'text'],
outputs='text',
title = "vqa",
examples = [['income.png', 'What are the 2020 net sales?'], ['invoice.png','What is the invoice number?']])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7026)

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

View File

@ -0,0 +1 @@
pip install protobuf==3.20.*

View File

@ -0,0 +1,26 @@
import gradio as gr
from transformers import pipeline
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
def sentiment_analysis(text):
results = classifier(text)
total_result = ""
for result in results[0]:
total_result += f"Sentiment: {result.get('label')}, Score: {result.get('score'):.2f}"
total_result += '\r\n'
return total_result
demo = gr.Interface(fn=sentiment_analysis,
inputs='text',
outputs='text',
title = "文本情感分析"
)
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7028)

31
git-base-textvqa/app.py Normal file
View File

@ -0,0 +1,31 @@
from transformers import AutoProcessor, AutoModelForCausalLM
from huggingface_hub import hf_hub_download
from PIL import Image
import gradio as gr
import torch
processor = AutoProcessor.from_pretrained("microsoft/git-base-textvqa")
model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-textvqa")
def vqa(image, question):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
pixel_values = processor(images=inp, return_tensors="pt").pixel_values
input_ids = processor(text=question, add_special_tokens=False).input_ids
input_ids = [processor.tokenizer.cls_token_id] + input_ids
input_ids = torch.tensor(input_ids).unsqueeze(0)
generated_ids = model.generate(pixel_values=pixel_values, input_ids=input_ids, max_length=50)
return processor.batch_decode(generated_ids, skip_special_tokens=True)
demo = gr.Interface(fn=vqa,
inputs=['image', 'text'],
outputs='text',
title = "vqa",
examples = [['soccer.jpg', 'how many people in the picture?']])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7024)

BIN
git-base-textvqa/soccer.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

31
git-base-vqav2/app.py Normal file
View File

@ -0,0 +1,31 @@
from transformers import AutoProcessor, AutoModelForCausalLM
from huggingface_hub import hf_hub_download
from PIL import Image
import gradio as gr
import torch
processor = AutoProcessor.from_pretrained("microsoft/git-base-vqav2")
model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-vqav2")
def vqa(image, question):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
pixel_values = processor(images=inp, return_tensors="pt").pixel_values
input_ids = processor(text=question, add_special_tokens=False).input_ids
input_ids = [processor.tokenizer.cls_token_id] + input_ids
input_ids = torch.tensor(input_ids).unsqueeze(0)
generated_ids = model.generate(pixel_values=pixel_values, input_ids=input_ids, max_length=50)
return processor.batch_decode(generated_ids, skip_special_tokens=True)
demo = gr.Interface(fn=vqa,
inputs=['image', 'text'],
outputs='text',
title = "vqa",
examples = [['soccer.jpg', 'how many people in the picture?']])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7024)

BIN
git-base-vqav2/soccer.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

31
git-large-vqav2/app.py Normal file
View File

@ -0,0 +1,31 @@
from transformers import AutoProcessor, AutoModelForCausalLM
from huggingface_hub import hf_hub_download
from PIL import Image
import gradio as gr
import torch
processor = AutoProcessor.from_pretrained("microsoft/git-large-vqav2")
model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-vqav2")
def vqa(image, question):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
pixel_values = processor(images=inp, return_tensors="pt").pixel_values
input_ids = processor(text=question, add_special_tokens=False).input_ids
input_ids = [processor.tokenizer.cls_token_id] + input_ids
input_ids = torch.tensor(input_ids).unsqueeze(0)
generated_ids = model.generate(pixel_values=pixel_values, input_ids=input_ids, max_length=50)
return processor.batch_decode(generated_ids, skip_special_tokens=True)
demo = gr.Interface(fn=vqa,
inputs=['image', 'text'],
outputs='text',
title = "vqa",
examples = [['soccer.jpg', 'how many people in the picture?']])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7024)

BIN
git-large-vqav2/soccer.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

View File

@ -0,0 +1,25 @@
from transformers import pipeline
import gradio as gr
from PIL import Image
nlp = pipeline(
"document-question-answering",
model="impira/layoutlm-document-qa",
)
def vqa(image, question):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
return nlp(inp, question)
demo = gr.Interface(fn=vqa,
inputs=['image', 'text'],
outputs='text',
title = "vqa",
examples = [['income.png', 'What are the 2020 net sales?'], ['invoice.png','What is the invoice number?']])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7025)

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

View File

@ -0,0 +1,2 @@
pytesseract
tesseract

View File

@ -0,0 +1,35 @@
import gradio as gr
from transformers import AutoProcessor, LayoutLMv2ForQuestionAnswering, set_seed
import torch
from PIL import Image
set_seed(88)
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")
def vqa(image, question):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
encoding = processor(inp, question, return_tensors="pt")
outputs = model(**encoding)
predicted_start_idx = outputs.start_logits.argmax(-1).item()
predicted_end_idx = outputs.end_logits.argmax(-1).item()
predicted_start_idx, predicted_end_idx
predicted_answer_tokens = encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1]
predicted_answer = processor.tokenizer.decode(predicted_answer_tokens)
return predicted_answer
demo = gr.Interface(fn=vqa,
inputs=['image', 'text'],
outputs='text',
title = "vqa",
examples = [['income.png', 'What are the 2020 net sales?'], ['invoice.png','What is the invoice number?']])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7026)

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

View File

@ -0,0 +1,33 @@
import gradio as gr
import torch
from transformers import LayoutLMv3Processor, LayoutLMv3ForQuestionAnswering
from PIL import Image
processor = LayoutLMv3Processor.from_pretrained("rubentito/layoutlmv3-base-mpdocvqa", apply_ocr=False)
model = LayoutLMv3ForQuestionAnswering.from_pretrained("rubentito/layoutlmv3-base-mpdocvqa")
def vqa(image, question):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
context = ["Example"]
boxes = [0, 0, 1000, 1000] # This is an example bounding box covering the whole image.
document_encoding = processor(inp, question, context, boxes=boxes, return_tensors="pt")
outputs = model(**document_encoding)
start_idx = torch.argmax(outputs.start_logits, axis=1)
end_idx = torch.argmax(outputs.end_logits, axis=1)
answers = self.processor.tokenizer.decode(input_tokens[start_idx: end_idx+1]).strip()
return answers
demo = gr.Interface(fn=vqa,
inputs=['image', 'text'],
outputs='text',
title = "vqa",
examples = [['income.png', 'What are the 2020 net sales?'], ['invoice.png','What is the invoice number?']])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7026)

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

26
resnet-50/app.py Normal file
View File

@ -0,0 +1,26 @@
import gradio as gr
from transformers import AutoImageProcessor, ResNetForImageClassification
import torch
processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")
def image_classification(image):
inputs = processor(image, return_tensors="pt")
with torch.no_grad():
logits = model(**inputs).logits
predicted_label = logits.argmax(-1).item()
return model.config.id2label[predicted_label]
demo = gr.Interface(fn=image_classification,
inputs=gr.Image(),
outputs=gr.Label(num_top_classes=1),
title = "图像分类",
examples = ['dog.jpeg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7027)

BIN
resnet-50/dog.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

23
sd-vae-ft-mse/app.py Normal file
View File

@ -0,0 +1,23 @@
from diffusers.models import AutoencoderKL
from diffusers import StableDiffusionPipeline
import gradio as gr
model = "CompVis/stable-diffusion-v1-4"
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse")
pipe = StableDiffusionPipeline.from_pretrained(model, vae=vae)
def text2image(prompt):
image = pipe(prompt).images[0]
return image
demo = gr.Interface(fn=text2image,
inputs='text',
outputs='image',
title = "text2image",
examples = ['a photo of an astronaut riding a horse on mars'])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7015)

View File

@ -0,0 +1,29 @@
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
modelName="Seethal/sentiment_analysis_generic_dataset"
tokenizer = AutoTokenizer.from_pretrained(modelName)
model = AutoModelForSequenceClassification.from_pretrained(modelName)
sentimentPipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
Label2Des = {
"LABEL_0": "NEGATIVE",
"LABEL_1": "NEUTRAL",
"LABEL_2": "POSITIVE"
}
def sentiment_analysis(text):
results = sentimentPipeline(text)
return f"Sentiment: {Label2Des.get(results[0]['label'])}, Score: {results[0]['score']:.2f}"
demo = gr.Interface(fn=sentiment_analysis,
inputs='text',
outputs='text',
title = "文本情感分析"
)
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7028)

View File

@ -0,0 +1,29 @@
import gradio as gr
import torch
import gc
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"
model_id = "stabilityai/stable-diffusion-2-1"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")
def text2image(prompt):
image = pipe(prompt).images[0]
return image
demo = gr.Interface(fn=text2image,
inputs='text',
outputs='image',
title = "text2image",
examples = ['a photo of an astronaut riding a horse on mars'])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7013)

View File

@ -0,0 +1,25 @@
import gradio as gr
from diffusers import StableDiffusionPipeline
import torch
model_id = "CompVis/stable-diffusion-v1-4"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
def text2image(prompt):
image = pipe(prompt).images[0]
return image
demo = gr.Interface(fn=text2image,
inputs='text',
outputs='image',
title = "text2image",
examples = ['a photo of an astronaut riding a horse on mars'])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7014)

View File

@ -0,0 +1,25 @@
import gradio as gr
from diffusers import StableDiffusionPipeline
import torch
model_id = "runwayml/stable-diffusion-v1-5"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
def text2image(prompt):
image = pipe(prompt).images[0]
return image
demo = gr.Interface(fn=text2image,
inputs='text',
outputs='image',
title = "text2image",
examples = ['a photo of an astronaut riding a horse on mars'])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7012)

28
t5-3b/app.py Normal file
View File

@ -0,0 +1,28 @@
import gradio as gr
from transformers import T5Tokenizer, T5ForConditionalGeneration
tokenizer = T5Tokenizer.from_pretrained("t5-3b")
model = T5ForConditionalGeneration.from_pretrained("t5-3b")
def translation(english, language):
if language == 'German':
input_ids = tokenizer("translate English to German: " + english, return_tensors="pt").input_ids
elif language == 'French':
input_ids = tokenizer("translate English to French: " + english, return_tensors="pt").input_ids
else:
input_ids = tokenizer("translate English to Romanian: " + english, return_tensors="pt").input_ids
outputs = model.generate(input_ids)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
demo = gr.Interface(fn=translation,
inputs=['text', gr.inputs.Radio(['German','French','Romanian'], type='value', default='German', label='language')],
outputs='text',
title = "翻译"
)
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7028)

28
t5-base/app.py Normal file
View File

@ -0,0 +1,28 @@
import gradio as gr
from transformers import T5Tokenizer, T5ForConditionalGeneration
tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("t5-base")
def translation(english, language):
if language == 'German':
input_ids = tokenizer("translate English to German: " + english, return_tensors="pt").input_ids
elif language == 'French':
input_ids = tokenizer("translate English to French: " + english, return_tensors="pt").input_ids
else:
input_ids = tokenizer("translate English to Romanian: " + english, return_tensors="pt").input_ids
outputs = model.generate(input_ids)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
demo = gr.Interface(fn=translation,
inputs=['text', gr.inputs.Radio(['German','French','Romanian'], type='value', default='German', label='language')],
outputs='text',
title = "翻译"
)
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7028)

28
t5-large/app.py Normal file
View File

@ -0,0 +1,28 @@
import gradio as gr
from transformers import T5Tokenizer, T5ForConditionalGeneration
tokenizer = T5Tokenizer.from_pretrained("t5-large")
model = T5ForConditionalGeneration.from_pretrained("t5-large")
def translation(english, language):
if language == 'German':
input_ids = tokenizer("translate English to German: " + english, return_tensors="pt").input_ids
elif language == 'French':
input_ids = tokenizer("translate English to French: " + english, return_tensors="pt").input_ids
else:
input_ids = tokenizer("translate English to Romanian: " + english, return_tensors="pt").input_ids
outputs = model.generate(input_ids)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
demo = gr.Interface(fn=translation,
inputs=['text', gr.inputs.Radio(['German','French','Romanian'], type='value', default='German', label='language')],
outputs='text',
title = "翻译"
)
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7028)

28
t5-small/app.py Normal file
View File

@ -0,0 +1,28 @@
import gradio as gr
from transformers import T5Tokenizer, T5ForConditionalGeneration
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")
def translation(english, language):
if language == 'German':
input_ids = tokenizer("translate English to German: " + english, return_tensors="pt").input_ids
elif language == 'French':
input_ids = tokenizer("translate English to French: " + english, return_tensors="pt").input_ids
else:
input_ids = tokenizer("translate English to Romanian: " + english, return_tensors="pt").input_ids
outputs = model.generate(input_ids)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
demo = gr.Interface(fn=translation,
inputs=['text', gr.inputs.Radio(['German','French','Romanian'], type='value', default='German', label='language')],
outputs='text',
title = "翻译"
)
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7028)

View File

@ -0,0 +1,26 @@
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import requests
import gradio as gr
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
def ocr(image):
pixel_values = processor(images=image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_text
demo = gr.Interface(fn=ocr,
inputs='image',
outputs='text',
title = "ocr",
examples = ['handwritten.jpeg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7010)

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

26
trocr-base-printed/app.py Normal file
View File

@ -0,0 +1,26 @@
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import requests
import gradio as gr
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed')
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed')
def ocr(image):
pixel_values = processor(images=image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_text
demo = gr.Interface(fn=ocr,
inputs='image',
outputs='text',
title = "ocr",
examples = ['printed.jpg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7011)

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

@ -0,0 +1,22 @@
import gradio as gr
from transformers import pipeline
model_path = "cardiffnlp/twitter-roberta-base-sentiment-latest"
sentiment_task = pipeline("sentiment-analysis", model=model_path, tokenizer=model_path)
def sentiment_analysis(text):
results = sentiment_task(text)
return results
demo = gr.Interface(fn=sentiment_analysis,
inputs='text',
outputs='text',
title = "文本情感分析"
)
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7028)

View File

@ -0,0 +1,22 @@
import gradio as gr
from transformers import pipeline
model_path = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
sentiment_task = pipeline("sentiment-analysis", model=model_path, tokenizer=model_path)
def sentiment_analysis(text):
results = sentiment_task(text)
return results
demo = gr.Interface(fn=sentiment_analysis,
inputs='text',
outputs='text',
title = "文本情感分析"
)
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7028)

View File

@ -0,0 +1,31 @@
from transformers import ViltProcessor, ViltForQuestionAnswering
import requests
from PIL import Image
import gradio as gr
import torch
processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
def vqa(image, question):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
inputs = processor(inp, question, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
idx = logits.argmax(-1).item()
return model.config.id2label[idx]
demo = gr.Interface(fn=vqa,
inputs=['image', 'text'],
outputs='text',
title = "vqa",
examples = [['soccer.jpg', 'how many people in the picture?']])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7023)

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

26
vit-age-classifier/app.py Normal file
View File

@ -0,0 +1,26 @@
import gradio as gr
import torch
from transformers import ViTFeatureExtractor, ViTForImageClassification
model = ViTForImageClassification.from_pretrained('nateraw/vit-age-classifier')
transforms = ViTFeatureExtractor.from_pretrained('nateraw/vit-age-classifier')
def image_classification(image):
inputs = transforms(image, return_tensors='pt')
logits = model(**inputs).logits
predicted_label = logits.argmax(-1).item()
return model.config.id2label[predicted_label]
demo = gr.Interface(fn=image_classification,
inputs=gr.Image(),
outputs='text',
title = "年龄划分",
examples = ['dog.jpeg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7027)

BIN
vit-age-classifier/dog.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

39
vit-gpt2-coco-en/app.py Normal file
View File

@ -0,0 +1,39 @@
import torch
import requests
from PIL import Image
from transformers import ViTFeatureExtractor, AutoTokenizer, VisionEncoderDecoderModel
import gradio as gr
loc = "ydshieh/vit-gpt2-coco-en"
feature_extractor = ViTFeatureExtractor.from_pretrained(loc)
tokenizer = AutoTokenizer.from_pretrained(loc)
model = VisionEncoderDecoderModel.from_pretrained(loc)
model.eval()
def predict(image):
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
with torch.no_grad():
output_ids = model.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True).sequences
preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
total_caption = ""
for pred in preds:
total_caption = total_caption + pred.strip()
total_caption = total_caption + "\r\n"
return total_caption
demo = gr.Interface(fn=predict,
inputs='image',
outputs='text',
title = "image2text",
examples = ['soccer.jpg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7019)

BIN
vit-gpt2-coco-en/soccer.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

View File

@ -0,0 +1,29 @@
from transformers import pipeline
import gradio as gr
import cv2
from PIL import Image
image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
def ocr(image):
inp = Image.fromarray(image.astype('uint8'), 'RGB')
text = image_to_text(inp)
total_caption = ""
for caption in text:
total_caption = total_caption + caption.get('generated_text')
total_caption = total_caption + '\r\n'
return total_caption
demo = gr.Interface(fn=ocr,
inputs='image',
outputs='text',
title = "image2text",
examples = ['soccer.jpg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7016)

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

View File

@ -0,0 +1,24 @@
import gradio as gr
from transformers import ViTImageProcessor, ViTModel
processor = ViTImageProcessor.from_pretrained('google/vit-large-patch16-224-in21k')
model = ViTModel.from_pretrained('google/vit-large-patch16-224-in21k')
def image_classification(image):
inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
predicted_label = logits.argmax(-1).item()
return model.config.id2label[predicted_label]
demo = gr.Interface(fn=image_classification,
inputs=gr.Image(),
outputs=gr.Label(num_top_classes=1),
title = "图像分类",
examples = ['dog.jpeg'])
if __name__ == "__main__":
demo.queue(concurrency_count=3)
demo.launch(server_name = "0.0.0.0", server_port = 7027)

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB