|
#!/usr/bin/env python3 |
|
# /// script |
|
# requires-python = ">=3.10" |
|
# dependencies = [ |
|
# "google-genai>=1.0.0", |
|
# "pillow>=10.0.0", |
|
# ] |
|
# /// |
|
""" |
|
Generate images using Google's Nano Banana Pro (Gemini 3 Pro Image) API. |
|
|
|
Usage: |
|
uv run generate_image.py --prompt "your image description" --filename "output.png" [--resolution 1K|2K|4K] [--api-key KEY] |
|
""" |
|
|
|
import argparse |
|
import os |
|
import sys |
|
from pathlib import Path |
|
|
|
|
|
def get_api_key(provided_key: str | None) -> str | None: |
|
"""Get API key from argument first, then environment.""" |
|
if provided_key: |
|
return provided_key |
|
return os.environ.get("GEMINI_API_KEY") |
|
|
|
|
|
def main(): |
|
parser = argparse.ArgumentParser( |
|
description="Generate images using Nano Banana Pro (Gemini 3 Pro Image)" |
|
) |
|
parser.add_argument( |
|
"--prompt", "-p", |
|
required=True, |
|
help="Image description/prompt" |
|
) |
|
parser.add_argument( |
|
"--filename", "-f", |
|
required=True, |
|
help="Output filename (e.g., sunset-mountains.png)" |
|
) |
|
parser.add_argument( |
|
"--input-image", "-i", |
|
help="Optional input image path for editing/modification" |
|
) |
|
parser.add_argument( |
|
"--resolution", "-r", |
|
choices=["1K", "2K", "4K"], |
|
default="1K", |
|
help="Output resolution: 1K (default), 2K, or 4K" |
|
) |
|
parser.add_argument( |
|
"--api-key", "-k", |
|
help="Gemini API key (overrides GEMINI_API_KEY env var)" |
|
) |
|
|
|
args = parser.parse_args() |
|
|
|
# Get API key |
|
api_key = get_api_key(args.api_key) |
|
if not api_key: |
|
print("Error: No API key provided.", file=sys.stderr) |
|
print("Please either:", file=sys.stderr) |
|
print(" 1. Provide --api-key argument", file=sys.stderr) |
|
print(" 2. Set GEMINI_API_KEY environment variable", file=sys.stderr) |
|
sys.exit(1) |
|
|
|
# Import here after checking API key to avoid slow import on error |
|
from google import genai |
|
from google.genai import types |
|
from PIL import Image as PILImage |
|
|
|
# Initialise client |
|
client = genai.Client(api_key=api_key) |
|
|
|
# Set up output path |
|
output_path = Path(args.filename) |
|
output_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
|
# Load input image if provided |
|
input_image = None |
|
output_resolution = args.resolution |
|
if args.input_image: |
|
try: |
|
input_image = PILImage.open(args.input_image) |
|
print(f"Loaded input image: {args.input_image}") |
|
|
|
# Auto-detect resolution if not explicitly set by user |
|
if args.resolution == "1K": # Default value |
|
# Map input image size to resolution |
|
width, height = input_image.size |
|
max_dim = max(width, height) |
|
if max_dim >= 3000: |
|
output_resolution = "4K" |
|
elif max_dim >= 1500: |
|
output_resolution = "2K" |
|
else: |
|
output_resolution = "1K" |
|
print(f"Auto-detected resolution: {output_resolution} (from input {width}x{height})") |
|
except Exception as e: |
|
print(f"Error loading input image: {e}", file=sys.stderr) |
|
sys.exit(1) |
|
|
|
# Build contents (image first if editing, prompt only if generating) |
|
if input_image: |
|
contents = [input_image, args.prompt] |
|
print(f"Editing image with resolution {output_resolution}...") |
|
else: |
|
contents = args.prompt |
|
print(f"Generating image with resolution {output_resolution}...") |
|
|
|
try: |
|
response = client.models.generate_content( |
|
model="gemini-3-pro-image-preview", |
|
contents=contents, |
|
config=types.GenerateContentConfig( |
|
response_modalities=["TEXT", "IMAGE"], |
|
image_config=types.ImageConfig( |
|
image_size=output_resolution |
|
) |
|
) |
|
) |
|
|
|
# Process response and convert to PNG |
|
image_saved = False |
|
for part in response.parts: |
|
if part.text is not None: |
|
print(f"Model response: {part.text}") |
|
elif part.inline_data is not None: |
|
# Convert inline data to PIL Image and save as PNG |
|
from io import BytesIO |
|
|
|
# inline_data.data is already bytes, not base64 |
|
image_data = part.inline_data.data |
|
if isinstance(image_data, str): |
|
# If it's a string, it might be base64 |
|
import base64 |
|
image_data = base64.b64decode(image_data) |
|
|
|
image = PILImage.open(BytesIO(image_data)) |
|
|
|
# Ensure RGB mode for PNG (convert RGBA to RGB with white background if needed) |
|
if image.mode == 'RGBA': |
|
rgb_image = PILImage.new('RGB', image.size, (255, 255, 255)) |
|
rgb_image.paste(image, mask=image.split()[3]) |
|
rgb_image.save(str(output_path), 'PNG') |
|
elif image.mode == 'RGB': |
|
image.save(str(output_path), 'PNG') |
|
else: |
|
image.convert('RGB').save(str(output_path), 'PNG') |
|
image_saved = True |
|
|
|
if image_saved: |
|
full_path = output_path.resolve() |
|
print(f"\nImage saved: {full_path}") |
|
else: |
|
print("Error: No image was generated in the response.", file=sys.stderr) |
|
sys.exit(1) |
|
|
|
except Exception as e: |
|
print(f"Error generating image: {e}", file=sys.stderr) |
|
sys.exit(1) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |