asomoza/chrono_edit_normal.py

## chrono_edit_normal.py
import numpy as np
import torch
from diffusers.utils import load_image
from PIL import Image
from transformers import CLIPVisionModel

from chronoedit_diffusers.pipeline_chronoedit import ChronoEditPipeline

# Resolution presets
RESOLUTION_PRESETS = {
    "480p": 480 * 832,
    "720p": 720 * 1280,
    "1080p": 1080 * 1920,
}


def calculate_dimensions(image, mod_value):
    """
    Calculate output dimensions based on resolution settings.

    Args:
        image: PIL Image
        mod_value: Modulo value for dimension alignment

    Returns:
        Tuple of (width, height)
    """

    # Get max area from preset or override
    target_area = 720 * 1280

    # Calculate dimensions maintaining aspect ratio
    aspect_ratio = image.height / image.width
    calculated_height = (
        round(np.sqrt(target_area * aspect_ratio)) // mod_value * mod_value
    )
    calculated_width = (
        round(np.sqrt(target_area / aspect_ratio)) // mod_value * mod_value
    )

    return calculated_width, calculated_height


device = "cuda"
model_path = "./checkpoints/ChronoEdit-14B-Diffusers"
image = load_image(
    "https://huggingface.co/datasets/OzzyGT/diffusers-examples/resolve/main/qwen-image-edit/crab.png"
)
num_frames = 5
prompt = "make the background of the image a tropical island with palm trees and a clear blue sky"
negative_prompt = None
num_inference_steps = 50
guidance_scale = 5
enable_temporal_reasoning = False
num_temporal_reasoning_steps = 50
offload_model = False
seed = 42

image_encoder = CLIPVisionModel.from_pretrained(
    model_path, subfolder="image_encoder", torch_dtype=torch.float32
)

pipe = ChronoEditPipeline.from_pretrained(
    model_path, image_encoder=image_encoder, torch_dtype=torch.bfloat16
)
pipe.enable_model_cpu_offload()

# Calculate output dimensions
mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
width, height = calculate_dimensions(image, mod_value)
image = image.resize((width, height))
generator = torch.Generator(device=device).manual_seed(seed)

output = pipe(
    image=image,
    prompt=prompt,
    negative_prompt=negative_prompt,
    height=height,
    width=width,
    num_frames=num_frames,
    num_inference_steps=num_inference_steps,
    guidance_scale=guidance_scale,
    enable_temporal_reasoning=enable_temporal_reasoning,
    num_temporal_reasoning_steps=num_temporal_reasoning_steps,
    generator=generator,
    offload_model=offload_model,
).frames[0]

last_frame = (output[-1] * 255).clip(0, 255).astype("uint8")
Image.fromarray(last_frame).save("normal_output.png")
	import numpy as np
	import torch
	from diffusers.utils import load_image
	from PIL import Image
	from transformers import CLIPVisionModel

	from chronoedit_diffusers.pipeline_chronoedit import ChronoEditPipeline

	# Resolution presets
	RESOLUTION_PRESETS = {
	"480p": 480 * 832,
	"720p": 720 * 1280,
	"1080p": 1080 * 1920,
	}


	def calculate_dimensions(image, mod_value):
	"""
	Calculate output dimensions based on resolution settings.

	Args:
	image: PIL Image
	mod_value: Modulo value for dimension alignment

	Returns:
	Tuple of (width, height)
	"""

	# Get max area from preset or override
	target_area = 720 * 1280

	# Calculate dimensions maintaining aspect ratio
	aspect_ratio = image.height / image.width
	calculated_height = (
	round(np.sqrt(target_area * aspect_ratio)) // mod_value * mod_value
	)
	calculated_width = (
	round(np.sqrt(target_area / aspect_ratio)) // mod_value * mod_value
	)

	return calculated_width, calculated_height


	device = "cuda"
	model_path = "./checkpoints/ChronoEdit-14B-Diffusers"
	image = load_image(
	"https://huggingface.co/datasets/OzzyGT/diffusers-examples/resolve/main/qwen-image-edit/crab.png"
	)
	num_frames = 5
	prompt = "make the background of the image a tropical island with palm trees and a clear blue sky"
	negative_prompt = None
	num_inference_steps = 50
	guidance_scale = 5
	enable_temporal_reasoning = False
	num_temporal_reasoning_steps = 50
	offload_model = False
	seed = 42

	image_encoder = CLIPVisionModel.from_pretrained(
	model_path, subfolder="image_encoder", torch_dtype=torch.float32
	)

	pipe = ChronoEditPipeline.from_pretrained(
	model_path, image_encoder=image_encoder, torch_dtype=torch.bfloat16
	)
	pipe.enable_model_cpu_offload()

	# Calculate output dimensions
	mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
	width, height = calculate_dimensions(image, mod_value)
	image = image.resize((width, height))
	generator = torch.Generator(device=device).manual_seed(seed)

	output = pipe(
	image=image,
	prompt=prompt,
	negative_prompt=negative_prompt,
	height=height,
	width=width,
	num_frames=num_frames,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	enable_temporal_reasoning=enable_temporal_reasoning,
	num_temporal_reasoning_steps=num_temporal_reasoning_steps,
	generator=generator,
	offload_model=offload_model,
	).frames[0]

	last_frame = (output[-1] * 255).clip(0, 255).astype("uint8")
	Image.fromarray(last_frame).save("normal_output.png")
No results found