diff --git a/AnimeGANv2/app.py b/AnimeGANv2/app.py new file mode 100644 index 0000000..4d86403 --- /dev/null +++ b/AnimeGANv2/app.py @@ -0,0 +1,33 @@ +import gradio as gr +import torch + +model2 = torch.hub.load( + "AK391/animegan2-pytorch:main", + "generator", + pretrained=True, + progress=False +) +model1 = torch.hub.load("AK391/animegan2-pytorch:main", "generator", pretrained="face_paint_512_v1") +face2paint = torch.hub.load( + 'AK391/animegan2-pytorch:main', 'face2paint', + size=512,side_by_side=False +) + +def inference(img, ver): + if ver == 'version 2 (πŸ”Ί robustness,πŸ”» stylization)': + out = face2paint(model2, img) + else: + out = face2paint(model1, img) + return out + +title = "动漫风格迁移" +examples=[['groot.jpeg','version 2 (πŸ”Ί robustness,πŸ”» stylization)'],['gongyoo.jpeg','version 1 (πŸ”Ί stylization, πŸ”» robustness)']] + +demo = gr.Interface( + fn=inference, + inputs=[gr.inputs.Image(type="pil"),gr.inputs.Radio(['version 1 (πŸ”Ί stylization, πŸ”» robustness)','version 2 (πŸ”Ί robustness,πŸ”» stylization)'], type="value", default='version 2 (πŸ”Ί robustness,πŸ”» stylization)', label='version')], + outputs=gr.outputs.Image(type="pil"), + title=title, + examples=examples) + +demo.launch(server_name = "0.0.0.0", server_port = 7022) diff --git a/AnimeGANv2/gongyoo.jpeg b/AnimeGANv2/gongyoo.jpeg new file mode 100644 index 0000000..8f09a41 Binary files /dev/null and b/AnimeGANv2/gongyoo.jpeg differ diff --git a/AnimeGANv2/groot.jpeg b/AnimeGANv2/groot.jpeg new file mode 100644 index 0000000..06b192f Binary files /dev/null and b/AnimeGANv2/groot.jpeg differ diff --git a/ViT-B-SAM/app.py b/ViT-B-SAM/app.py new file mode 100644 index 0000000..ad5f37a --- /dev/null +++ b/ViT-B-SAM/app.py @@ -0,0 +1,81 @@ +import gradio as gr +import numpy as np +import torch +import matplotlib.pyplot as plt +import cv2 +import sys +sys.path.append("..") +from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor +from PIL import Image +import io + +sam_checkpoint = "sam_vit_b_01ec64.pth" +model_type = "vit_b" + +device = "cuda" + +sam = sam_model_registry[model_type](checkpoint=sam_checkpoint) +sam.to(device=device) + +mask_generator = SamAutomaticMaskGenerator(sam) + +mask_generator_2 = SamAutomaticMaskGenerator( + model=sam, + points_per_side=32, + pred_iou_thresh=0.86, + stability_score_thresh=0.92, + crop_n_layers=1, + crop_n_points_downscale_factor=2, + min_mask_region_area=100, # Requires open-cv to run post-processing +) + + +def fig2img(fig): + buf = io.BytesIO() + fig.savefig(buf) + buf.seek(0) + img = Image.open(buf) + return img + +def show_anns(anns): + if len(anns) == 0: + return + sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True) + ax = plt.gca() + ax.set_autoscale_on(False) + polygons = [] + color = [] + for ann in sorted_anns: + m = ann['segmentation'] + img = np.ones((m.shape[0], m.shape[1], 3)) + color_mask = np.random.random((1, 3)).tolist()[0] + for i in range(3): + img[:,:,i] = color_mask[i] + ax.imshow(np.dstack((img, m*0.35))) + + +def segment_image(image): + image = image.astype('uint8') + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + #masks = mask_generator.generate(image) + masks2 = mask_generator_2.generate(image) + + plt.figure(figsize=(20,20)) + plt.imshow(image) + #show_anns(masks) + show_anns(masks2) + plt.axis('off') + + return fig2img(plt.gcf()) + + +demo = gr.Interface(fn=segment_image, + inputs=gr.Image(), + outputs=gr.Image(), + title = "ε›Ύεƒεˆ†ε‰²", + examples = ['dog.jpg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7027) diff --git a/ViT-B-SAM/dog.jpg b/ViT-B-SAM/dog.jpg new file mode 100644 index 0000000..26d6454 Binary files /dev/null and b/ViT-B-SAM/dog.jpg differ diff --git a/ViT-H-SAM/app.py b/ViT-H-SAM/app.py new file mode 100644 index 0000000..dd759b0 --- /dev/null +++ b/ViT-H-SAM/app.py @@ -0,0 +1,81 @@ +import gradio as gr +import numpy as np +import torch +import matplotlib.pyplot as plt +import cv2 +import sys +sys.path.append("..") +from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor +from PIL import Image +import io + +sam_checkpoint = "sam_vit_h_4b8939.pth" +model_type = "vit_h" + +device = "cuda" + +sam = sam_model_registry[model_type](checkpoint=sam_checkpoint) +sam.to(device=device) + +mask_generator = SamAutomaticMaskGenerator(sam) + +mask_generator_2 = SamAutomaticMaskGenerator( + model=sam, + points_per_side=32, + pred_iou_thresh=0.86, + stability_score_thresh=0.92, + crop_n_layers=1, + crop_n_points_downscale_factor=2, + min_mask_region_area=100, # Requires open-cv to run post-processing +) + + +def fig2img(fig): + buf = io.BytesIO() + fig.savefig(buf) + buf.seek(0) + img = Image.open(buf) + return img + +def show_anns(anns): + if len(anns) == 0: + return + sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True) + ax = plt.gca() + ax.set_autoscale_on(False) + polygons = [] + color = [] + for ann in sorted_anns: + m = ann['segmentation'] + img = np.ones((m.shape[0], m.shape[1], 3)) + color_mask = np.random.random((1, 3)).tolist()[0] + for i in range(3): + img[:,:,i] = color_mask[i] + ax.imshow(np.dstack((img, m*0.35))) + + +def segment_image(image): + image = image.astype('uint8') + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + #masks = mask_generator.generate(image) + masks2 = mask_generator_2.generate(image) + + plt.figure(figsize=(20,20)) + plt.imshow(image) + #show_anns(masks) + show_anns(masks2) + plt.axis('off') + + return fig2img(plt.gcf()) + + +demo = gr.Interface(fn=segment_image, + inputs=gr.Image(), + outputs=gr.Image(), + title = "ε›Ύεƒεˆ†ε‰²", + examples = ['dog.jpg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7027) diff --git a/ViT-H-SAM/dog.jpg b/ViT-H-SAM/dog.jpg new file mode 100644 index 0000000..26d6454 Binary files /dev/null and b/ViT-H-SAM/dog.jpg differ diff --git a/ViT-L-SAM/app.py b/ViT-L-SAM/app.py new file mode 100644 index 0000000..5da2bd1 --- /dev/null +++ b/ViT-L-SAM/app.py @@ -0,0 +1,81 @@ +import gradio as gr +import numpy as np +import torch +import matplotlib.pyplot as plt +import cv2 +import sys +sys.path.append("..") +from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor +from PIL import Image +import io + +sam_checkpoint = "sam_vit_l_0b3195.pth" +model_type = "vit_l" + +device = "cuda" + +sam = sam_model_registry[model_type](checkpoint=sam_checkpoint) +sam.to(device=device) + +mask_generator = SamAutomaticMaskGenerator(sam) + +mask_generator_2 = SamAutomaticMaskGenerator( + model=sam, + points_per_side=32, + pred_iou_thresh=0.86, + stability_score_thresh=0.92, + crop_n_layers=1, + crop_n_points_downscale_factor=2, + min_mask_region_area=100, # Requires open-cv to run post-processing +) + + +def fig2img(fig): + buf = io.BytesIO() + fig.savefig(buf) + buf.seek(0) + img = Image.open(buf) + return img + +def show_anns(anns): + if len(anns) == 0: + return + sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True) + ax = plt.gca() + ax.set_autoscale_on(False) + polygons = [] + color = [] + for ann in sorted_anns: + m = ann['segmentation'] + img = np.ones((m.shape[0], m.shape[1], 3)) + color_mask = np.random.random((1, 3)).tolist()[0] + for i in range(3): + img[:,:,i] = color_mask[i] + ax.imshow(np.dstack((img, m*0.35))) + + +def segment_image(image): + image = image.astype('uint8') + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + #masks = mask_generator.generate(image) + masks2 = mask_generator_2.generate(image) + + plt.figure(figsize=(20,20)) + plt.imshow(image) + #show_anns(masks) + show_anns(masks2) + plt.axis('off') + + return fig2img(plt.gcf()) + + +demo = gr.Interface(fn=segment_image, + inputs=gr.Image(), + outputs=gr.Image(), + title = "ε›Ύεƒεˆ†ε‰²", + examples = ['dog.jpg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7027) diff --git a/ViT-L-SAM/dog.jpg b/ViT-L-SAM/dog.jpg new file mode 100644 index 0000000..26d6454 Binary files /dev/null and b/ViT-L-SAM/dog.jpg differ diff --git a/blip-image-captioning-base/app.py b/blip-image-captioning-base/app.py new file mode 100644 index 0000000..d158d4c --- /dev/null +++ b/blip-image-captioning-base/app.py @@ -0,0 +1,29 @@ +import torch +import requests +from PIL import Image +from transformers import BlipProcessor, BlipForConditionalGeneration +import gradio as gr + + +processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") +model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=torch.float16).to("cuda") + + +def image2text(image): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + text = "a photography of" + inputs = processor(inp, text, return_tensors="pt").to("cuda", torch.float16) + out = model.generate(**inputs) + + return processor.decode(out[0], skip_special_tokens=True) + + +demo = gr.Interface(fn=image2text, + inputs='image', + outputs='text', + title = "image2text", + examples = ['soccer.jpg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7017) diff --git a/blip-image-captioning-base/soccer.jpg b/blip-image-captioning-base/soccer.jpg new file mode 100644 index 0000000..3f2ec82 Binary files /dev/null and b/blip-image-captioning-base/soccer.jpg differ diff --git a/blip-image-captioning-large/app.py b/blip-image-captioning-large/app.py new file mode 100644 index 0000000..f26c94c --- /dev/null +++ b/blip-image-captioning-large/app.py @@ -0,0 +1,29 @@ +import torch +import requests +from PIL import Image +from transformers import BlipProcessor, BlipForConditionalGeneration +import gradio as gr + + +processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") +model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large", torch_dtype=torch.float16).to("cuda") + + +def image2text(image): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + text = "a photography of" + inputs = processor(inp, text, return_tensors="pt").to("cuda", torch.float16) + out = model.generate(**inputs) + + return processor.decode(out[0], skip_special_tokens=True) + + +demo = gr.Interface(fn=image2text, + inputs='image', + outputs='text', + title = "image2text", + examples = ['soccer.jpg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7018) diff --git a/blip-image-captioning-large/soccer.jpg b/blip-image-captioning-large/soccer.jpg new file mode 100644 index 0000000..3f2ec82 Binary files /dev/null and b/blip-image-captioning-large/soccer.jpg differ diff --git a/blip-vqa-base/app.py b/blip-vqa-base/app.py new file mode 100644 index 0000000..cc0edbd --- /dev/null +++ b/blip-vqa-base/app.py @@ -0,0 +1,27 @@ +import torch +import requests +from PIL import Image +from transformers import BlipProcessor, BlipForQuestionAnswering +import gradio as gr + + +processor = BlipProcessor.from_pretrained("ybelkada/blip-vqa-base") +model = BlipForQuestionAnswering.from_pretrained("ybelkada/blip-vqa-base", torch_dtype=torch.float16).to("cuda") + +def vqa(image, question): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + inputs = processor(inp, question, return_tensors="pt").to("cuda", torch.float16) + out = model.generate(**inputs) + + return processor.decode(out[0], skip_special_tokens=True) + + +demo = gr.Interface(fn=vqa, + inputs=['image', 'text'], + outputs='text', + title = "vqa", + examples = [['soccer.jpg', 'how many people in the picture?']]) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7021) diff --git a/blip-vqa-base/soccer.jpg b/blip-vqa-base/soccer.jpg new file mode 100644 index 0000000..3f2ec82 Binary files /dev/null and b/blip-vqa-base/soccer.jpg differ diff --git a/blip-vqa-capfilt-large/app.py b/blip-vqa-capfilt-large/app.py new file mode 100644 index 0000000..361eb33 --- /dev/null +++ b/blip-vqa-capfilt-large/app.py @@ -0,0 +1,26 @@ +import torch +import requests +from PIL import Image +from transformers import BlipProcessor, BlipForQuestionAnswering +import gradio as gr + + +processor = BlipProcessor.from_pretrained("ybelkada/blip-vqa-capfilt-large") +model = BlipForQuestionAnswering.from_pretrained("ybelkada/blip-vqa-capfilt-large", torch_dtype=torch.float16).to("cuda") + +def vqa(image, question): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + inputs = processor(inp, question, return_tensors="pt").to("cuda", torch.float16) + out = model.generate(**inputs) + + return processor.decode(out[0], skip_special_tokens=True) + +demo = gr.Interface(fn=vqa, + inputs=['image', 'text'], + outputs='text', + title = "vqa", + examples = [['soccer.jpg', 'how many people in the picture?']]) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7025) diff --git a/blip-vqa-capfilt-large/soccer.jpg b/blip-vqa-capfilt-large/soccer.jpg new file mode 100644 index 0000000..3f2ec82 Binary files /dev/null and b/blip-vqa-capfilt-large/soccer.jpg differ diff --git a/blip2-opt-2.7b/app.py b/blip2-opt-2.7b/app.py new file mode 100644 index 0000000..665fa9c --- /dev/null +++ b/blip2-opt-2.7b/app.py @@ -0,0 +1,29 @@ +import torch +import requests +from PIL import Image +from transformers import Blip2Processor, Blip2ForConditionalGeneration +import gradio as gr + + +processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b") +model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16).to("cuda") + + +def image2text(image): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + text = "a photography of" + inputs = processor(inp, text, return_tensors="pt").to("cuda", torch.float16) + out = model.generate(**inputs) + + return processor.decode(out[0], skip_special_tokens=True) + + +demo = gr.Interface(fn=image2text, + inputs='image', + outputs='text', + title = "image2text", + examples = ['soccer.jpg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7019) diff --git a/blip2-opt-2.7b/soccer.jpg b/blip2-opt-2.7b/soccer.jpg new file mode 100644 index 0000000..3f2ec82 Binary files /dev/null and b/blip2-opt-2.7b/soccer.jpg differ diff --git a/chatgpt-prompts-bart-long/app.py b/chatgpt-prompts-bart-long/app.py new file mode 100644 index 0000000..b4b7790 --- /dev/null +++ b/chatgpt-prompts-bart-long/app.py @@ -0,0 +1,23 @@ +import gradio as gr +from transformers import AutoTokenizer, AutoModelForSeq2SeqLM + + +tokenizer = AutoTokenizer.from_pretrained("merve/chatgpt-prompts-bart-long") +model = AutoModelForSeq2SeqLM.from_pretrained("merve/chatgpt-prompts-bart-long", from_tf=True) + +def generate(prompt): + batch = tokenizer(prompt, return_tensors="pt") + generated_ids = model.generate(batch["input_ids"], max_new_tokens=150) + output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) + + return output[0] + +demo = gr.Interface(fn=generate, + inputs='text', + outputs='text', + title = "generate prompt", + examples = [["photographer"], ["developer"]]) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7020) diff --git a/chatgpt-prompts-bart-long/requirements.txt b/chatgpt-prompts-bart-long/requirements.txt new file mode 100644 index 0000000..755b092 --- /dev/null +++ b/chatgpt-prompts-bart-long/requirements.txt @@ -0,0 +1,3 @@ +tensorflow +transformers +torch diff --git a/donut-base-finetuned-docvqa/app.py b/donut-base-finetuned-docvqa/app.py new file mode 100644 index 0000000..cd8d09b --- /dev/null +++ b/donut-base-finetuned-docvqa/app.py @@ -0,0 +1,50 @@ +from transformers import DonutProcessor, VisionEncoderDecoderModel +import gradio as gr +from PIL import Image +import torch +import re + + +processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa") +model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa") + + +def vqa(image, question): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + pixel_values = processor(inp, return_tensors="pt").pixel_values + task_prompt = "{user_input}" + prompt = task_prompt.replace("{user_input}", question) + decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt")["input_ids"] + + device = "cuda" if torch.cuda.is_available() else "cpu" + model.to(device) + + outputs = model.generate(pixel_values.to(device), + decoder_input_ids=decoder_input_ids.to(device), + max_length=model.decoder.config.max_position_embeddings, + early_stopping=True, + pad_token_id=processor.tokenizer.pad_token_id, + eos_token_id=processor.tokenizer.eos_token_id, + use_cache=True, + num_beams=1, + bad_words_ids=[[processor.tokenizer.unk_token_id]], + return_dict_in_generate=True, + output_scores=True) + + + seq = processor.batch_decode(outputs.sequences)[0] + seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "") + seq = re.sub(r"<.*?>", "", seq, count=1).strip() + + return processor.token2json(seq) + + +demo = gr.Interface(fn=vqa, + inputs=['image', 'text'], + outputs='text', + title = "vqa", + examples = [['income.png', 'What are the 2020 net sales?'], ['invoice.png','What is the invoice number?']]) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7026) diff --git a/donut-base-finetuned-docvqa/income.png b/donut-base-finetuned-docvqa/income.png new file mode 100644 index 0000000..3ffa17a Binary files /dev/null and b/donut-base-finetuned-docvqa/income.png differ diff --git a/donut-base-finetuned-docvqa/invoice.png b/donut-base-finetuned-docvqa/invoice.png new file mode 100644 index 0000000..78f9c64 Binary files /dev/null and b/donut-base-finetuned-docvqa/invoice.png differ diff --git a/donut-base-finetuned-docvqa/memo.txt b/donut-base-finetuned-docvqa/memo.txt new file mode 100644 index 0000000..6c3a629 --- /dev/null +++ b/donut-base-finetuned-docvqa/memo.txt @@ -0,0 +1 @@ +pip install protobuf==3.20.* diff --git a/emotion-english-distilroberta-base/app.py b/emotion-english-distilroberta-base/app.py new file mode 100644 index 0000000..8e763d1 --- /dev/null +++ b/emotion-english-distilroberta-base/app.py @@ -0,0 +1,26 @@ +import gradio as gr +from transformers import pipeline + + +classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True) + +def sentiment_analysis(text): + results = classifier(text) + total_result = "" + + for result in results[0]: + total_result += f"Sentiment: {result.get('label')}, Score: {result.get('score'):.2f}" + total_result += '\r\n' + + return total_result + +demo = gr.Interface(fn=sentiment_analysis, + inputs='text', + outputs='text', + title = "ζ–‡ζœ¬ζƒ…ζ„Ÿεˆ†ζž" + ) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7028) diff --git a/git-base-textvqa/app.py b/git-base-textvqa/app.py new file mode 100644 index 0000000..5192cd5 --- /dev/null +++ b/git-base-textvqa/app.py @@ -0,0 +1,31 @@ +from transformers import AutoProcessor, AutoModelForCausalLM +from huggingface_hub import hf_hub_download +from PIL import Image +import gradio as gr +import torch + + +processor = AutoProcessor.from_pretrained("microsoft/git-base-textvqa") +model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-textvqa") + + +def vqa(image, question): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + pixel_values = processor(images=inp, return_tensors="pt").pixel_values + + input_ids = processor(text=question, add_special_tokens=False).input_ids + input_ids = [processor.tokenizer.cls_token_id] + input_ids + input_ids = torch.tensor(input_ids).unsqueeze(0) + + generated_ids = model.generate(pixel_values=pixel_values, input_ids=input_ids, max_length=50) + return processor.batch_decode(generated_ids, skip_special_tokens=True) + +demo = gr.Interface(fn=vqa, + inputs=['image', 'text'], + outputs='text', + title = "vqa", + examples = [['soccer.jpg', 'how many people in the picture?']]) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7024) diff --git a/git-base-textvqa/soccer.jpg b/git-base-textvqa/soccer.jpg new file mode 100644 index 0000000..3f2ec82 Binary files /dev/null and b/git-base-textvqa/soccer.jpg differ diff --git a/git-base-vqav2/app.py b/git-base-vqav2/app.py new file mode 100644 index 0000000..eff9fa0 --- /dev/null +++ b/git-base-vqav2/app.py @@ -0,0 +1,31 @@ +from transformers import AutoProcessor, AutoModelForCausalLM +from huggingface_hub import hf_hub_download +from PIL import Image +import gradio as gr +import torch + + +processor = AutoProcessor.from_pretrained("microsoft/git-base-vqav2") +model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-vqav2") + + +def vqa(image, question): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + pixel_values = processor(images=inp, return_tensors="pt").pixel_values + + input_ids = processor(text=question, add_special_tokens=False).input_ids + input_ids = [processor.tokenizer.cls_token_id] + input_ids + input_ids = torch.tensor(input_ids).unsqueeze(0) + + generated_ids = model.generate(pixel_values=pixel_values, input_ids=input_ids, max_length=50) + return processor.batch_decode(generated_ids, skip_special_tokens=True) + +demo = gr.Interface(fn=vqa, + inputs=['image', 'text'], + outputs='text', + title = "vqa", + examples = [['soccer.jpg', 'how many people in the picture?']]) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7024) diff --git a/git-base-vqav2/soccer.jpg b/git-base-vqav2/soccer.jpg new file mode 100644 index 0000000..3f2ec82 Binary files /dev/null and b/git-base-vqav2/soccer.jpg differ diff --git a/git-large-vqav2/app.py b/git-large-vqav2/app.py new file mode 100644 index 0000000..a60bc3b --- /dev/null +++ b/git-large-vqav2/app.py @@ -0,0 +1,31 @@ +from transformers import AutoProcessor, AutoModelForCausalLM +from huggingface_hub import hf_hub_download +from PIL import Image +import gradio as gr +import torch + + +processor = AutoProcessor.from_pretrained("microsoft/git-large-vqav2") +model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-vqav2") + + +def vqa(image, question): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + pixel_values = processor(images=inp, return_tensors="pt").pixel_values + + input_ids = processor(text=question, add_special_tokens=False).input_ids + input_ids = [processor.tokenizer.cls_token_id] + input_ids + input_ids = torch.tensor(input_ids).unsqueeze(0) + + generated_ids = model.generate(pixel_values=pixel_values, input_ids=input_ids, max_length=50) + return processor.batch_decode(generated_ids, skip_special_tokens=True) + +demo = gr.Interface(fn=vqa, + inputs=['image', 'text'], + outputs='text', + title = "vqa", + examples = [['soccer.jpg', 'how many people in the picture?']]) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7024) diff --git a/git-large-vqav2/soccer.jpg b/git-large-vqav2/soccer.jpg new file mode 100644 index 0000000..3f2ec82 Binary files /dev/null and b/git-large-vqav2/soccer.jpg differ diff --git a/layoutlm-document-qa/app.py b/layoutlm-document-qa/app.py new file mode 100644 index 0000000..3061e1a --- /dev/null +++ b/layoutlm-document-qa/app.py @@ -0,0 +1,25 @@ +from transformers import pipeline +import gradio as gr +from PIL import Image + + +nlp = pipeline( + "document-question-answering", + model="impira/layoutlm-document-qa", +) + +def vqa(image, question): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + + return nlp(inp, question) + + +demo = gr.Interface(fn=vqa, + inputs=['image', 'text'], + outputs='text', + title = "vqa", + examples = [['income.png', 'What are the 2020 net sales?'], ['invoice.png','What is the invoice number?']]) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7025) diff --git a/layoutlm-document-qa/income.png b/layoutlm-document-qa/income.png new file mode 100644 index 0000000..3ffa17a Binary files /dev/null and b/layoutlm-document-qa/income.png differ diff --git a/layoutlm-document-qa/invoice.png b/layoutlm-document-qa/invoice.png new file mode 100644 index 0000000..78f9c64 Binary files /dev/null and b/layoutlm-document-qa/invoice.png differ diff --git a/layoutlm-document-qa/requirements.txt b/layoutlm-document-qa/requirements.txt new file mode 100644 index 0000000..5bf20e6 --- /dev/null +++ b/layoutlm-document-qa/requirements.txt @@ -0,0 +1,2 @@ +pytesseract +tesseract diff --git a/layoutlmv2-base-uncased/app.py b/layoutlmv2-base-uncased/app.py new file mode 100644 index 0000000..cba8e55 --- /dev/null +++ b/layoutlmv2-base-uncased/app.py @@ -0,0 +1,35 @@ +import gradio as gr +from transformers import AutoProcessor, LayoutLMv2ForQuestionAnswering, set_seed +import torch +from PIL import Image + + +set_seed(88) +processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased") +model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased") + + +def vqa(image, question): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + encoding = processor(inp, question, return_tensors="pt") + + outputs = model(**encoding) + predicted_start_idx = outputs.start_logits.argmax(-1).item() + predicted_end_idx = outputs.end_logits.argmax(-1).item() + predicted_start_idx, predicted_end_idx + + predicted_answer_tokens = encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1] + predicted_answer = processor.tokenizer.decode(predicted_answer_tokens) + + return predicted_answer + + +demo = gr.Interface(fn=vqa, + inputs=['image', 'text'], + outputs='text', + title = "vqa", + examples = [['income.png', 'What are the 2020 net sales?'], ['invoice.png','What is the invoice number?']]) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7026) diff --git a/layoutlmv2-base-uncased/income.png b/layoutlmv2-base-uncased/income.png new file mode 100644 index 0000000..3ffa17a Binary files /dev/null and b/layoutlmv2-base-uncased/income.png differ diff --git a/layoutlmv2-base-uncased/invoice.png b/layoutlmv2-base-uncased/invoice.png new file mode 100644 index 0000000..78f9c64 Binary files /dev/null and b/layoutlmv2-base-uncased/invoice.png differ diff --git a/layoutlmv3-base-mpdocvqa/app.py b/layoutlmv3-base-mpdocvqa/app.py new file mode 100644 index 0000000..3a88aaa --- /dev/null +++ b/layoutlmv3-base-mpdocvqa/app.py @@ -0,0 +1,33 @@ +import gradio as gr +import torch +from transformers import LayoutLMv3Processor, LayoutLMv3ForQuestionAnswering +from PIL import Image + + +processor = LayoutLMv3Processor.from_pretrained("rubentito/layoutlmv3-base-mpdocvqa", apply_ocr=False) +model = LayoutLMv3ForQuestionAnswering.from_pretrained("rubentito/layoutlmv3-base-mpdocvqa") + + +def vqa(image, question): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + + context = ["Example"] + boxes = [0, 0, 1000, 1000] # This is an example bounding box covering the whole image. + document_encoding = processor(inp, question, context, boxes=boxes, return_tensors="pt") + outputs = model(**document_encoding) + + start_idx = torch.argmax(outputs.start_logits, axis=1) + end_idx = torch.argmax(outputs.end_logits, axis=1) + answers = self.processor.tokenizer.decode(input_tokens[start_idx: end_idx+1]).strip() + + return answers + +demo = gr.Interface(fn=vqa, + inputs=['image', 'text'], + outputs='text', + title = "vqa", + examples = [['income.png', 'What are the 2020 net sales?'], ['invoice.png','What is the invoice number?']]) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7026) diff --git a/layoutlmv3-base-mpdocvqa/income.png b/layoutlmv3-base-mpdocvqa/income.png new file mode 100644 index 0000000..3ffa17a Binary files /dev/null and b/layoutlmv3-base-mpdocvqa/income.png differ diff --git a/layoutlmv3-base-mpdocvqa/invoice.png b/layoutlmv3-base-mpdocvqa/invoice.png new file mode 100644 index 0000000..78f9c64 Binary files /dev/null and b/layoutlmv3-base-mpdocvqa/invoice.png differ diff --git a/resnet-50/app.py b/resnet-50/app.py new file mode 100644 index 0000000..3b38441 --- /dev/null +++ b/resnet-50/app.py @@ -0,0 +1,26 @@ +import gradio as gr +from transformers import AutoImageProcessor, ResNetForImageClassification +import torch + + +processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50") +model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50") + +def image_classification(image): + inputs = processor(image, return_tensors="pt") + with torch.no_grad(): + logits = model(**inputs).logits + predicted_label = logits.argmax(-1).item() + + return model.config.id2label[predicted_label] + +demo = gr.Interface(fn=image_classification, + inputs=gr.Image(), + outputs=gr.Label(num_top_classes=1), + title = "ε›Ύεƒεˆ†η±»", + examples = ['dog.jpeg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7027) diff --git a/resnet-50/dog.jpeg b/resnet-50/dog.jpeg new file mode 100644 index 0000000..f83318c Binary files /dev/null and b/resnet-50/dog.jpeg differ diff --git a/sd-vae-ft-mse/app.py b/sd-vae-ft-mse/app.py new file mode 100644 index 0000000..908d3c7 --- /dev/null +++ b/sd-vae-ft-mse/app.py @@ -0,0 +1,23 @@ +from diffusers.models import AutoencoderKL +from diffusers import StableDiffusionPipeline +import gradio as gr + +model = "CompVis/stable-diffusion-v1-4" +vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse") +pipe = StableDiffusionPipeline.from_pretrained(model, vae=vae) + +def text2image(prompt): + image = pipe(prompt).images[0] + + return image + + +demo = gr.Interface(fn=text2image, + inputs='text', + outputs='image', + title = "text2image", + examples = ['a photo of an astronaut riding a horse on mars']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7015) diff --git a/sentiment_analysis_generic_dataset/app.py b/sentiment_analysis_generic_dataset/app.py new file mode 100644 index 0000000..f5ca314 --- /dev/null +++ b/sentiment_analysis_generic_dataset/app.py @@ -0,0 +1,29 @@ +import gradio as gr +from transformers import pipeline, AutoTokenizer, AutoConfig, AutoModelForSequenceClassification + +modelName="Seethal/sentiment_analysis_generic_dataset" + +tokenizer = AutoTokenizer.from_pretrained(modelName) +model = AutoModelForSequenceClassification.from_pretrained(modelName) +sentimentPipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) +Label2Des = { + "LABEL_0": "NEGATIVE", + "LABEL_1": "NEUTRAL", + "LABEL_2": "POSITIVE" +} + +def sentiment_analysis(text): + results = sentimentPipeline(text) + + return f"Sentiment: {Label2Des.get(results[0]['label'])}, Score: {results[0]['score']:.2f}" + +demo = gr.Interface(fn=sentiment_analysis, + inputs='text', + outputs='text', + title = "ζ–‡ζœ¬ζƒ…ζ„Ÿεˆ†ζž" + ) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7028) diff --git a/stable-diffusion-2-1/app.py b/stable-diffusion-2-1/app.py new file mode 100644 index 0000000..315c8bc --- /dev/null +++ b/stable-diffusion-2-1/app.py @@ -0,0 +1,29 @@ +import gradio as gr +import torch +import gc +from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler +import os + +os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32" + +model_id = "stabilityai/stable-diffusion-2-1" +pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) +pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) +pipe = pipe.to("cuda") + + +def text2image(prompt): + image = pipe(prompt).images[0] + + return image + + +demo = gr.Interface(fn=text2image, + inputs='text', + outputs='image', + title = "text2image", + examples = ['a photo of an astronaut riding a horse on mars']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7013) diff --git a/stable-diffusion-v1-4/app.py b/stable-diffusion-v1-4/app.py new file mode 100644 index 0000000..fa6a3a6 --- /dev/null +++ b/stable-diffusion-v1-4/app.py @@ -0,0 +1,25 @@ +import gradio as gr +from diffusers import StableDiffusionPipeline +import torch + + +model_id = "CompVis/stable-diffusion-v1-4" +pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) +pipe = pipe.to("cuda") + + +def text2image(prompt): + image = pipe(prompt).images[0] + + return image + + +demo = gr.Interface(fn=text2image, + inputs='text', + outputs='image', + title = "text2image", + examples = ['a photo of an astronaut riding a horse on mars']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7014) diff --git a/stable-diffusion-v1-5/app.py b/stable-diffusion-v1-5/app.py new file mode 100644 index 0000000..4178dfc --- /dev/null +++ b/stable-diffusion-v1-5/app.py @@ -0,0 +1,25 @@ +import gradio as gr +from diffusers import StableDiffusionPipeline +import torch + + +model_id = "runwayml/stable-diffusion-v1-5" +pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) +pipe = pipe.to("cuda") + + +def text2image(prompt): + image = pipe(prompt).images[0] + + return image + + +demo = gr.Interface(fn=text2image, + inputs='text', + outputs='image', + title = "text2image", + examples = ['a photo of an astronaut riding a horse on mars']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7012) diff --git a/t5-3b/app.py b/t5-3b/app.py new file mode 100644 index 0000000..35977fa --- /dev/null +++ b/t5-3b/app.py @@ -0,0 +1,28 @@ +import gradio as gr +from transformers import T5Tokenizer, T5ForConditionalGeneration + + +tokenizer = T5Tokenizer.from_pretrained("t5-3b") +model = T5ForConditionalGeneration.from_pretrained("t5-3b") + +def translation(english, language): + if language == 'German': + input_ids = tokenizer("translate English to German: " + english, return_tensors="pt").input_ids + elif language == 'French': + input_ids = tokenizer("translate English to French: " + english, return_tensors="pt").input_ids + else: + input_ids = tokenizer("translate English to Romanian: " + english, return_tensors="pt").input_ids + outputs = model.generate(input_ids) + + return tokenizer.decode(outputs[0], skip_special_tokens=True) + +demo = gr.Interface(fn=translation, + inputs=['text', gr.inputs.Radio(['German','French','Romanian'], type='value', default='German', label='language')], + outputs='text', + title = "ηΏ»θ―‘" + ) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7028) diff --git a/t5-base/app.py b/t5-base/app.py new file mode 100644 index 0000000..fc522c2 --- /dev/null +++ b/t5-base/app.py @@ -0,0 +1,28 @@ +import gradio as gr +from transformers import T5Tokenizer, T5ForConditionalGeneration + + +tokenizer = T5Tokenizer.from_pretrained("t5-base") +model = T5ForConditionalGeneration.from_pretrained("t5-base") + +def translation(english, language): + if language == 'German': + input_ids = tokenizer("translate English to German: " + english, return_tensors="pt").input_ids + elif language == 'French': + input_ids = tokenizer("translate English to French: " + english, return_tensors="pt").input_ids + else: + input_ids = tokenizer("translate English to Romanian: " + english, return_tensors="pt").input_ids + outputs = model.generate(input_ids) + + return tokenizer.decode(outputs[0], skip_special_tokens=True) + +demo = gr.Interface(fn=translation, + inputs=['text', gr.inputs.Radio(['German','French','Romanian'], type='value', default='German', label='language')], + outputs='text', + title = "ηΏ»θ―‘" + ) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7028) diff --git a/t5-large/app.py b/t5-large/app.py new file mode 100644 index 0000000..5d29218 --- /dev/null +++ b/t5-large/app.py @@ -0,0 +1,28 @@ +import gradio as gr +from transformers import T5Tokenizer, T5ForConditionalGeneration + + +tokenizer = T5Tokenizer.from_pretrained("t5-large") +model = T5ForConditionalGeneration.from_pretrained("t5-large") + +def translation(english, language): + if language == 'German': + input_ids = tokenizer("translate English to German: " + english, return_tensors="pt").input_ids + elif language == 'French': + input_ids = tokenizer("translate English to French: " + english, return_tensors="pt").input_ids + else: + input_ids = tokenizer("translate English to Romanian: " + english, return_tensors="pt").input_ids + outputs = model.generate(input_ids) + + return tokenizer.decode(outputs[0], skip_special_tokens=True) + +demo = gr.Interface(fn=translation, + inputs=['text', gr.inputs.Radio(['German','French','Romanian'], type='value', default='German', label='language')], + outputs='text', + title = "ηΏ»θ―‘" + ) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7028) diff --git a/t5-small/app.py b/t5-small/app.py new file mode 100644 index 0000000..85bb08b --- /dev/null +++ b/t5-small/app.py @@ -0,0 +1,28 @@ +import gradio as gr +from transformers import T5Tokenizer, T5ForConditionalGeneration + + +tokenizer = T5Tokenizer.from_pretrained("t5-small") +model = T5ForConditionalGeneration.from_pretrained("t5-small") + +def translation(english, language): + if language == 'German': + input_ids = tokenizer("translate English to German: " + english, return_tensors="pt").input_ids + elif language == 'French': + input_ids = tokenizer("translate English to French: " + english, return_tensors="pt").input_ids + else: + input_ids = tokenizer("translate English to Romanian: " + english, return_tensors="pt").input_ids + outputs = model.generate(input_ids) + + return tokenizer.decode(outputs[0], skip_special_tokens=True) + +demo = gr.Interface(fn=translation, + inputs=['text', gr.inputs.Radio(['German','French','Romanian'], type='value', default='German', label='language')], + outputs='text', + title = "ηΏ»θ―‘" + ) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7028) diff --git a/trocr-base-handwritten/app.py b/trocr-base-handwritten/app.py new file mode 100644 index 0000000..23edddd --- /dev/null +++ b/trocr-base-handwritten/app.py @@ -0,0 +1,26 @@ +from transformers import TrOCRProcessor, VisionEncoderDecoderModel +from PIL import Image +import requests +import gradio as gr + + +processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten') +model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten') + +def ocr(image): + pixel_values = processor(images=image, return_tensors="pt").pixel_values + generated_ids = model.generate(pixel_values) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return generated_text + + +demo = gr.Interface(fn=ocr, + inputs='image', + outputs='text', + title = "ocr", + examples = ['handwritten.jpeg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7010) diff --git a/trocr-base-handwritten/handwritten.jpeg b/trocr-base-handwritten/handwritten.jpeg new file mode 100644 index 0000000..ccc96e5 Binary files /dev/null and b/trocr-base-handwritten/handwritten.jpeg differ diff --git a/trocr-base-printed/app.py b/trocr-base-printed/app.py new file mode 100644 index 0000000..a54c073 --- /dev/null +++ b/trocr-base-printed/app.py @@ -0,0 +1,26 @@ +from transformers import TrOCRProcessor, VisionEncoderDecoderModel +from PIL import Image +import requests +import gradio as gr + + +processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed') +model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed') + +def ocr(image): + pixel_values = processor(images=image, return_tensors="pt").pixel_values + generated_ids = model.generate(pixel_values) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return generated_text + + +demo = gr.Interface(fn=ocr, + inputs='image', + outputs='text', + title = "ocr", + examples = ['printed.jpg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7011) diff --git a/trocr-base-printed/printed.jpg b/trocr-base-printed/printed.jpg new file mode 100644 index 0000000..a2fadd0 Binary files /dev/null and b/trocr-base-printed/printed.jpg differ diff --git a/twitter-roberta-base-sentiment-latest/app.py b/twitter-roberta-base-sentiment-latest/app.py new file mode 100644 index 0000000..6685825 --- /dev/null +++ b/twitter-roberta-base-sentiment-latest/app.py @@ -0,0 +1,22 @@ +import gradio as gr +from transformers import pipeline + + +model_path = "cardiffnlp/twitter-roberta-base-sentiment-latest" +sentiment_task = pipeline("sentiment-analysis", model=model_path, tokenizer=model_path) + +def sentiment_analysis(text): + results = sentiment_task(text) + + return results + +demo = gr.Interface(fn=sentiment_analysis, + inputs='text', + outputs='text', + title = "ζ–‡ζœ¬ζƒ…ζ„Ÿεˆ†ζž" + ) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7028) diff --git a/twitter-xlm-roberta-base-sentiment/app.py b/twitter-xlm-roberta-base-sentiment/app.py new file mode 100644 index 0000000..fc9123e --- /dev/null +++ b/twitter-xlm-roberta-base-sentiment/app.py @@ -0,0 +1,22 @@ +import gradio as gr +from transformers import pipeline + + +model_path = "cardiffnlp/twitter-xlm-roberta-base-sentiment" +sentiment_task = pipeline("sentiment-analysis", model=model_path, tokenizer=model_path) + +def sentiment_analysis(text): + results = sentiment_task(text) + + return results + +demo = gr.Interface(fn=sentiment_analysis, + inputs='text', + outputs='text', + title = "ζ–‡ζœ¬ζƒ…ζ„Ÿεˆ†ζž" + ) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7028) diff --git a/vilt-b32-finetuned-vqa/app.py b/vilt-b32-finetuned-vqa/app.py new file mode 100644 index 0000000..5947af3 --- /dev/null +++ b/vilt-b32-finetuned-vqa/app.py @@ -0,0 +1,31 @@ +from transformers import ViltProcessor, ViltForQuestionAnswering +import requests +from PIL import Image +import gradio as gr +import torch + + +processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa") +model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa") + + +def vqa(image, question): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + inputs = processor(inp, question, return_tensors="pt") + + outputs = model(**inputs) + logits = outputs.logits + idx = logits.argmax(-1).item() + + return model.config.id2label[idx] + + +demo = gr.Interface(fn=vqa, + inputs=['image', 'text'], + outputs='text', + title = "vqa", + examples = [['soccer.jpg', 'how many people in the picture?']]) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7023) diff --git a/vilt-b32-finetuned-vqa/soccer.jpg b/vilt-b32-finetuned-vqa/soccer.jpg new file mode 100644 index 0000000..3f2ec82 Binary files /dev/null and b/vilt-b32-finetuned-vqa/soccer.jpg differ diff --git a/vit-age-classifier/app.py b/vit-age-classifier/app.py new file mode 100644 index 0000000..2e0767b --- /dev/null +++ b/vit-age-classifier/app.py @@ -0,0 +1,26 @@ +import gradio as gr +import torch +from transformers import ViTFeatureExtractor, ViTForImageClassification + + +model = ViTForImageClassification.from_pretrained('nateraw/vit-age-classifier') +transforms = ViTFeatureExtractor.from_pretrained('nateraw/vit-age-classifier') + +def image_classification(image): + inputs = transforms(image, return_tensors='pt') + logits = model(**inputs).logits + + predicted_label = logits.argmax(-1).item() + return model.config.id2label[predicted_label] + + +demo = gr.Interface(fn=image_classification, + inputs=gr.Image(), + outputs='text', + title = "εΉ΄ιΎ„εˆ’εˆ†", + examples = ['dog.jpeg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7027) diff --git a/vit-age-classifier/dog.jpeg b/vit-age-classifier/dog.jpeg new file mode 100644 index 0000000..f83318c Binary files /dev/null and b/vit-age-classifier/dog.jpeg differ diff --git a/vit-gpt2-coco-en/app.py b/vit-gpt2-coco-en/app.py new file mode 100644 index 0000000..30dfbb0 --- /dev/null +++ b/vit-gpt2-coco-en/app.py @@ -0,0 +1,39 @@ +import torch +import requests +from PIL import Image +from transformers import ViTFeatureExtractor, AutoTokenizer, VisionEncoderDecoderModel +import gradio as gr + + +loc = "ydshieh/vit-gpt2-coco-en" + +feature_extractor = ViTFeatureExtractor.from_pretrained(loc) +tokenizer = AutoTokenizer.from_pretrained(loc) +model = VisionEncoderDecoderModel.from_pretrained(loc) +model.eval() + + +def predict(image): + pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values + + with torch.no_grad(): + output_ids = model.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True).sequences + + preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True) + + total_caption = "" + for pred in preds: + total_caption = total_caption + pred.strip() + total_caption = total_caption + "\r\n" + + return total_caption + +demo = gr.Interface(fn=predict, + inputs='image', + outputs='text', + title = "image2text", + examples = ['soccer.jpg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7019) diff --git a/vit-gpt2-coco-en/soccer.jpg b/vit-gpt2-coco-en/soccer.jpg new file mode 100644 index 0000000..3f2ec82 Binary files /dev/null and b/vit-gpt2-coco-en/soccer.jpg differ diff --git a/vit-gpt2-image-captioning/app.py b/vit-gpt2-image-captioning/app.py new file mode 100644 index 0000000..cdd6ad9 --- /dev/null +++ b/vit-gpt2-image-captioning/app.py @@ -0,0 +1,29 @@ +from transformers import pipeline +import gradio as gr +import cv2 +from PIL import Image + + +image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") + + +def ocr(image): + inp = Image.fromarray(image.astype('uint8'), 'RGB') + text = image_to_text(inp) + + total_caption = "" + for caption in text: + total_caption = total_caption + caption.get('generated_text') + total_caption = total_caption + '\r\n' + + return total_caption + +demo = gr.Interface(fn=ocr, + inputs='image', + outputs='text', + title = "image2text", + examples = ['soccer.jpg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3).launch(server_name = "0.0.0.0", server_port = 7016) diff --git a/vit-gpt2-image-captioning/soccer.jpg b/vit-gpt2-image-captioning/soccer.jpg new file mode 100644 index 0000000..3f2ec82 Binary files /dev/null and b/vit-gpt2-image-captioning/soccer.jpg differ diff --git a/vit-large-patch14-clip-224.openai-ft-in12k-in1k/app.py b/vit-large-patch14-clip-224.openai-ft-in12k-in1k/app.py new file mode 100644 index 0000000..d57a248 --- /dev/null +++ b/vit-large-patch14-clip-224.openai-ft-in12k-in1k/app.py @@ -0,0 +1,24 @@ +import gradio as gr +from transformers import ViTImageProcessor, ViTModel + +processor = ViTImageProcessor.from_pretrained('google/vit-large-patch16-224-in21k') +model = ViTModel.from_pretrained('google/vit-large-patch16-224-in21k') + +def image_classification(image): + inputs = processor(images=image, return_tensors="pt") + outputs = model(**inputs) + logits = outputs.logits + predicted_label = logits.argmax(-1).item() + + return model.config.id2label[predicted_label] + +demo = gr.Interface(fn=image_classification, + inputs=gr.Image(), + outputs=gr.Label(num_top_classes=1), + title = "ε›Ύεƒεˆ†η±»", + examples = ['dog.jpeg']) + + +if __name__ == "__main__": + demo.queue(concurrency_count=3) + demo.launch(server_name = "0.0.0.0", server_port = 7027) diff --git a/vit-large-patch14-clip-224.openai-ft-in12k-in1k/dog.jpeg b/vit-large-patch14-clip-224.openai-ft-in12k-in1k/dog.jpeg new file mode 100644 index 0000000..f83318c Binary files /dev/null and b/vit-large-patch14-clip-224.openai-ft-in12k-in1k/dog.jpeg differ