Skip to content

Instantly share code, notes, and snippets.

@asomoza
Created October 31, 2025 17:35
Show Gist options
  • Select an option

  • Save asomoza/1b13e01bcd9cdb3d27e19413c1b32024 to your computer and use it in GitHub Desktop.

Select an option

Save asomoza/1b13e01bcd9cdb3d27e19413c1b32024 to your computer and use it in GitHub Desktop.
import numpy as np
import torch
from diffusers.utils import load_image
from PIL import Image
from transformers import CLIPVisionModel
from chronoedit_diffusers.pipeline_chronoedit import ChronoEditPipeline
# Resolution presets
RESOLUTION_PRESETS = {
"480p": 480 * 832,
"720p": 720 * 1280,
"1080p": 1080 * 1920,
}
def calculate_dimensions(image, mod_value):
"""
Calculate output dimensions based on resolution settings.
Args:
image: PIL Image
mod_value: Modulo value for dimension alignment
Returns:
Tuple of (width, height)
"""
# Get max area from preset or override
target_area = 720 * 1280
# Calculate dimensions maintaining aspect ratio
aspect_ratio = image.height / image.width
calculated_height = (
round(np.sqrt(target_area * aspect_ratio)) // mod_value * mod_value
)
calculated_width = (
round(np.sqrt(target_area / aspect_ratio)) // mod_value * mod_value
)
return calculated_width, calculated_height
device = "cuda"
model_path = "./checkpoints/ChronoEdit-14B-Diffusers"
image = load_image(
"https://huggingface.co/datasets/OzzyGT/diffusers-examples/resolve/main/qwen-image-edit/crab.png"
)
num_frames = 5
prompt = "make the background of the image a tropical island with palm trees and a clear blue sky"
negative_prompt = None
num_inference_steps = 50
guidance_scale = 5
enable_temporal_reasoning = False
num_temporal_reasoning_steps = 50
offload_model = False
seed = 42
image_encoder = CLIPVisionModel.from_pretrained(
model_path, subfolder="image_encoder", torch_dtype=torch.float32
)
pipe = ChronoEditPipeline.from_pretrained(
model_path, image_encoder=image_encoder, torch_dtype=torch.bfloat16
)
pipe.enable_model_cpu_offload()
# Calculate output dimensions
mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
width, height = calculate_dimensions(image, mod_value)
image = image.resize((width, height))
generator = torch.Generator(device=device).manual_seed(seed)
output = pipe(
image=image,
prompt=prompt,
negative_prompt=negative_prompt,
height=height,
width=width,
num_frames=num_frames,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
enable_temporal_reasoning=enable_temporal_reasoning,
num_temporal_reasoning_steps=num_temporal_reasoning_steps,
generator=generator,
offload_model=offload_model,
).frames[0]
last_frame = (output[-1] * 255).clip(0, 255).astype("uint8")
Image.fromarray(last_frame).save("normal_output.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment