lukestanley/t5_gemma.py

## t5_gemma.py
"""
In early Nov 2025, I tried the non-instruction tuned T5-Gemma models on Colab.
Why? Modern encoder-decoder models could address prompt injection risks which right now are very high and risky.
This logs some of my successes and HF social signals. I didn't get around to finetuning it yet.
--@lukestanley
"""
# !pip install -q -U transformers bitsandbytes accelerate
from huggingface_hub import login
# login(new_session=False)
login(token=READ_ONLY_HF_TOKEN)

import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

ARTICLE = """Jupiter is the fifth planet from the Sun and the largest in the Solar System. It is a gas giant with a mass more than two and a half times that of all the other planets in the Solar System combined, but slightly less than one-thousandth the mass of the Sun. Jupiter is the third brightest natural object in the Earth's night sky after the Moon and Venus. It has been known to ancient astronomers since before recorded history. It is named after the Roman god Jupiter. When viewed from Earth, Jupiter can be bright enough for its reflected light to cast visible shadows, and is on average the third-brightest natural object in the night sky after the Moon and Venus."""

# Each ❤️ heart is a HF like (recorded in early Nov 2025)
# Loading below models with bfloat16, until t5gemma-9b-2b-prefixlm

# 0.3B Parameters: 🪱 (the smallest size)
# model_name = "google/t5gemma-s-s-prefixlm" # ❤️❤️ FAILED, using 0.7 GB GPU RAM. The simplest model.

# 0.6B Parameters: 🪱🪱
# model_name = "google/t5gemma-b-b-prefixlm" # ❤️❤️❤️❤️❤️❤️❤️❤️❤️❤️ Half worked (hallucinated), using 1.2 GB GPU RAM.
# Output: "Jupiter is the fifth planet from Earth and largest in our solar system. It has a mass of 2,045 times that on average for all planets combined but slightly less than one-thousandth its own weight (1/367)."
# Note: The ratio is hallucinated, not well grounded.

# 1B Parameters: 🧠
# model_name = "google/t5gemma-l-l-prefixlm" # ❤️❤️❤️ Worked, using 2.6 GB GPU RAM.
# Output: "Jupiter is the fifth planet from Sun and largest in solar system."
# Didn't hallucinate! Probably gives better sentence structure than smaller models.

# 2B Parameters: 🧠🧠
# model_name = "google/t5gemma-ml-ml-prefixlm" # Worked, using 4.5 GB GPU RAM.
# "Jupiter is the largest planet in our solar system."

# 4B Parameters: 🧠🧠🧠 🧠
# model_name = "google/t5gemma-xl-xl-prefixlm" # ❤️ Worked, using 7.8 GB GPU RAM.
# Output: "Jupiter is the fifth planet from our sun and largest in size."
# I'd expect a big jump in nuance for the increased size.

# 6B Parameters: 🧠🧠🧠 🧠🧠🧠
# model_name = "google/t5gemma-2b-2b-prefixlm" # ❤️❤️❤️❤️ Worked, using 11.1 GB GPU RAM.
# Output: "Jupiter is the fifth planet from the Sun and largest in the solar system."
# I expect it to be powerful and coherent.

# 12B Parameters: 🧠🧠🧠 🧠🧠🧠 🧠🧠🧠 🧠🧠🧠
# model_name = "google/t5gemma-9b-2b-prefixlm" # ❤️ Asymmetric model
# load_in_8bit=True,
# llm_int8_enable_fp32_cpu_offload=True,
# Output: "Jupiter is the largest planet in the solar system and third brightest natural object after the Moon and Venus."

# 20B Parameters: 🧠🧠🧠 🧠🧠🧠 🧠🧠🧠 🧠🧠🧠 🧠🧠🧠 🧠🧠🧠 🧠🧠
model_name = "google/t5gemma-9b-9b-prefixlm" # ❤️❤️ Not tested yet.
# I expect it to be the most powerful model.

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Loading model...")
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    load_in_8bit=True, # Can try this later if needed for 12B+
    llm_int8_enable_fp32_cpu_offload=True,
    #dtype=torch.bfloat16,
    device_map="auto"
)
print("Model loaded successfully.")

PROMPT = f"""Article:
```
Coffee is a brewed drink prepared from roasted coffee beans, the seeds of berries from certain flowering plants in the Coffea genus. From the coffee fruit, the seeds are separated to produce a stable, raw product: unroasted green coffee. The seeds are then roasted, a process which transforms them into a consumable product: roasted coffee, which is ground into fine particles that are typically steeped in hot water before being filtered out, producing a cup of coffee.
```
One sentence summary of article:
```
Coffee is a beverage made by brewing roasted and ground beans from the Coffea plant.
```

Article:
```
The Moon is Earth's only natural satellite. It is the fifth largest satellite in the Solar System, and the largest and most massive relative to its parent planet. It is a planetary-mass object with a differentiated rocky body, making it a satellite planet under the geophysical definitions of the term.
```
One sentence summary of article:
```
The Moon is Earth's only natural satellite and the largest in the solar system relative to its host planet.
```

Article:
```
{ARTICLE}
```
One sentence summary of article:
```
"""
inputs = tokenizer(PROMPT, return_tensors="pt").to(device)
newline_token_id = tokenizer.encode("\n")[0]

# 4. Generate the summary
outputs = model.generate(**inputs, max_length=90, repetition_penalty=1.5, do_sample=False, eos_token_id=newline_token_id)

# 5. Decode the output
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)

# 6. Print the result
print("\n--- Original Article ---")
print(ARTICLE)
print("\n--- Generated Summary ---")
print(summary)
	"""
	In early Nov 2025, I tried the non-instruction tuned T5-Gemma models on Colab.
	Why? Modern encoder-decoder models could address prompt injection risks which right now are very high and risky.
	This logs some of my successes and HF social signals. I didn't get around to finetuning it yet.
	--@lukestanley
	"""
	# !pip install -q -U transformers bitsandbytes accelerate
	from huggingface_hub import login
	# login(new_session=False)
	login(token=READ_ONLY_HF_TOKEN)

	import torch
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	ARTICLE = """Jupiter is the fifth planet from the Sun and the largest in the Solar System. It is a gas giant with a mass more than two and a half times that of all the other planets in the Solar System combined, but slightly less than one-thousandth the mass of the Sun. Jupiter is the third brightest natural object in the Earth's night sky after the Moon and Venus. It has been known to ancient astronomers since before recorded history. It is named after the Roman god Jupiter. When viewed from Earth, Jupiter can be bright enough for its reflected light to cast visible shadows, and is on average the third-brightest natural object in the night sky after the Moon and Venus."""

	# Each ❤️ heart is a HF like (recorded in early Nov 2025)
	# Loading below models with bfloat16, until t5gemma-9b-2b-prefixlm

	# 0.3B Parameters: 🪱 (the smallest size)
	# model_name = "google/t5gemma-s-s-prefixlm" # ❤️❤️ FAILED, using 0.7 GB GPU RAM. The simplest model.

	# 0.6B Parameters: 🪱🪱
	# model_name = "google/t5gemma-b-b-prefixlm" # ❤️❤️❤️❤️❤️❤️❤️❤️❤️❤️ Half worked (hallucinated), using 1.2 GB GPU RAM.
	# Output: "Jupiter is the fifth planet from Earth and largest in our solar system. It has a mass of 2,045 times that on average for all planets combined but slightly less than one-thousandth its own weight (1/367)."
	# Note: The ratio is hallucinated, not well grounded.

	# 1B Parameters: 🧠
	# model_name = "google/t5gemma-l-l-prefixlm" # ❤️❤️❤️ Worked, using 2.6 GB GPU RAM.
	# Output: "Jupiter is the fifth planet from Sun and largest in solar system."
	# Didn't hallucinate! Probably gives better sentence structure than smaller models.

	# 2B Parameters: 🧠🧠
	# model_name = "google/t5gemma-ml-ml-prefixlm" # Worked, using 4.5 GB GPU RAM.
	# "Jupiter is the largest planet in our solar system."

	# 4B Parameters: 🧠🧠🧠 🧠
	# model_name = "google/t5gemma-xl-xl-prefixlm" # ❤️ Worked, using 7.8 GB GPU RAM.
	# Output: "Jupiter is the fifth planet from our sun and largest in size."
	# I'd expect a big jump in nuance for the increased size.

	# 6B Parameters: 🧠🧠🧠 🧠🧠🧠
	# model_name = "google/t5gemma-2b-2b-prefixlm" # ❤️❤️❤️❤️ Worked, using 11.1 GB GPU RAM.
	# Output: "Jupiter is the fifth planet from the Sun and largest in the solar system."
	# I expect it to be powerful and coherent.

	# 12B Parameters: 🧠🧠🧠 🧠🧠🧠 🧠🧠🧠 🧠🧠🧠
	# model_name = "google/t5gemma-9b-2b-prefixlm" # ❤️ Asymmetric model
	# load_in_8bit=True,
	# llm_int8_enable_fp32_cpu_offload=True,
	# Output: "Jupiter is the largest planet in the solar system and third brightest natural object after the Moon and Venus."

	# 20B Parameters: 🧠🧠🧠 🧠🧠🧠 🧠🧠🧠 🧠🧠🧠 🧠🧠🧠 🧠🧠🧠 🧠🧠
	model_name = "google/t5gemma-9b-9b-prefixlm" # ❤️❤️ Not tested yet.
	# I expect it to be the most powerful model.

	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {device}")

	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	print("Loading model...")
	model = AutoModelForSeq2SeqLM.from_pretrained(
	model_name,
	load_in_8bit=True, # Can try this later if needed for 12B+
	llm_int8_enable_fp32_cpu_offload=True,
	#dtype=torch.bfloat16,
	device_map="auto"
	)
	print("Model loaded successfully.")

	PROMPT = f"""Article:
	```
	Coffee is a brewed drink prepared from roasted coffee beans, the seeds of berries from certain flowering plants in the Coffea genus. From the coffee fruit, the seeds are separated to produce a stable, raw product: unroasted green coffee. The seeds are then roasted, a process which transforms them into a consumable product: roasted coffee, which is ground into fine particles that are typically steeped in hot water before being filtered out, producing a cup of coffee.
	```
	One sentence summary of article:
	```
	Coffee is a beverage made by brewing roasted and ground beans from the Coffea plant.
	```

	Article:
	```
	The Moon is Earth's only natural satellite. It is the fifth largest satellite in the Solar System, and the largest and most massive relative to its parent planet. It is a planetary-mass object with a differentiated rocky body, making it a satellite planet under the geophysical definitions of the term.
	```
	One sentence summary of article:
	```
	The Moon is Earth's only natural satellite and the largest in the solar system relative to its host planet.
	```

	Article:
	```
	{ARTICLE}
	```
	One sentence summary of article:
	```
	"""
	inputs = tokenizer(PROMPT, return_tensors="pt").to(device)
	newline_token_id = tokenizer.encode("\n")[0]

	# 4. Generate the summary
	outputs = model.generate(**inputs, max_length=90, repetition_penalty=1.5, do_sample=False, eos_token_id=newline_token_id)

	# 5. Decode the output
	summary = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# 6. Print the result
	print("\n--- Original Article ---")
	print(ARTICLE)
	print("\n--- Generated Summary ---")
	print(summary)
No results found