Created
March 30, 2024 12:00
-
-
Save homedirectory/2047fecca2d44a19688c950875ca94f9 to your computer and use it in GitHub Desktop.
Example of multimodal models from Hugging Face
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gradio as gr | |
from PIL import Image | |
import torch | |
from transformers import AutoProcessor, AutoModelForCausalLM | |
processor = AutoProcessor.from_pretrained("microsoft/git-base-textvqa") | |
model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-textvqa", cache_dir='/tmp') | |
def doit(image1, image2, question): | |
# concatenate the images side-by-side | |
image = Image.new('RGB', (image1.width + image2.width, image1.height)) | |
image.paste(image1, (0, 0)) | |
image.paste(image2, (image1.width, 0)) | |
# prepare the question | |
question = "Given these 2 images side-by-side, " + question | |
# actual work | |
pixel_values = processor(images=image, return_tensors="pt").pixel_values | |
input_ids = processor(text=question, add_special_tokens=False).input_ids | |
input_ids = [processor.tokenizer.cls_token_id] + input_ids | |
input_ids = torch.tensor(input_ids).unsqueeze(0) | |
generated_ids = model.generate(pixel_values=pixel_values, input_ids=input_ids, max_length=500) | |
answer = processor.batch_decode(generated_ids, skip_special_tokens=True) | |
return "\n".join(list(answer)) | |
gradio_app = gr.Interface( | |
doit, | |
inputs=[gr.Image(label="Select an image", sources=['upload', 'webcam'], type="pil"), | |
gr.Image(label="Select an image", sources=['upload', 'webcam'], type="pil"), | |
gr.Textbox(placeholder="Enter a question")], | |
outputs=[gr.Label(label="Answer")], | |
title="Question about 2 images", | |
) | |
if __name__ == "__main__": | |
gradio_app.launch() |
Author
homedirectory
commented
Mar 30, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment