pszemraj · June 29, 2025 21:29
diff --git a/test_gemma3n.py b/test_gemma3n.py
 # -*- coding: utf-8 -*-
 """gemma-3n-test

 pip install -U -q git+https://github.com/huggingface/transformers.git
 pip install -U -q git+https://github.com/huggingface/pytorch-image-models.git
 """

 from transformers import pipeline
 import torch

 torch.set_float32_matmul_precision("high")  # removable, for ampere+ GPU

 pipe = pipeline(
    "image-text-to-text",
    model="google/gemma-3n-e2b-it",
    device="cuda",
    torch_dtype="auto",
    # load_in_8bit=True,  # pip install bitsandbytes
    # load_in_4bit=True,  # pip install bitsandbytes
 )
 print("loaded model")
 messages = [
    {
        "role": "system",
        "content": [{"type": "text", "text": "You are a helpful assistant."}],
    },
    {
        "role": "user",
        "content": [
            {
                "type": "image",
                "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG",
            },
            {"type": "text", "text": "What animal is on the candy?"},
        ],
    },
 ]

 output = pipe(text=messages, max_new_tokens=200)
 print(output[0]["generated_text"][-1]["content"])
 # Okay, let's take a look!
 # Based on the image, the animal on the candy is a **turtle**.
 # You can see the shell shape and the head and legs.
	# -- coding: utf-8 --
	"""gemma-3n-test

	pip install -U -q git+https://github.com/huggingface/transformers.git
	pip install -U -q git+https://github.com/huggingface/pytorch-image-models.git
	"""

	from transformers import pipeline
	import torch

	torch.set_float32_matmul_precision("high") # removable, for ampere+ GPU

	pipe = pipeline(
	"image-text-to-text",
	model="google/gemma-3n-e2b-it",
	device="cuda",
	torch_dtype="auto",
	# load_in_8bit=True, # pip install bitsandbytes
	# load_in_4bit=True, # pip install bitsandbytes
	)
	print("loaded model")
	messages = [
	{
	"role": "system",
	"content": [{"type": "text", "text": "You are a helpful assistant."}],
	},
	{
	"role": "user",
	"content": [
	{
	"type": "image",
	"url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG",
	},
	{"type": "text", "text": "What animal is on the candy?"},
	],
	},
	]

	output = pipe(text=messages, max_new_tokens=200)
	print(output[0]["generated_text"][-1]["content"])
	# Okay, let's take a look!
	# Based on the image, the animal on the candy is a turtle.
	# You can see the shell shape and the head and legs.