asomoza/flux2_8GB_inference.py

## flux2_8GB_inference.py
import io
import os

import requests
import torch

from diffusers import Flux2Pipeline, Flux2Transformer2DModel


# you will need to have ~6.5GB of free VRAM and ~40GB of free RAM to run this script (~10 if you enable
# low_cpu_mem_usage=True)

# need to have a hf access token https://huggingface.co/docs/hub/en/security-tokens
# you can set it like this in linux: export HF_TOKEN="token"
# or like this in windows: set HF_TOKEN="token"
# also can uncomment the following line and set it directly here (not recommended for security reasons)
# os.environ["HF_TOKEN"] = "token"


repo_id = "diffusers/FLUX.2-dev-bnb-4bit"
torch_dtype = torch.bfloat16
device = "cuda"


def remote_text_encoder(prompts: str | list[str]):
    def _encode_single(prompt: str):
        response = requests.post(
            "https://remote-text-encoder-flux-2.huggingface.co/predict",
            json={"prompt": prompt},
            headers={"Authorization": f"Bearer {os.environ['HF_TOKEN']}", "Content-Type": "application/json"},
        )
        assert response.status_code == 200, f"{response.status_code=}"
        return torch.load(io.BytesIO(response.content))

    if isinstance(prompts, (list, tuple)):
        embeds = [_encode_single(p) for p in prompts]
        return torch.cat(embeds, dim=0)

    return _encode_single(prompts).to("cuda")


transformer = Flux2Transformer2DModel.from_pretrained(
    repo_id, subfolder="transformer", torch_dtype=torch_dtype, device_map="cpu"
)

pipe = Flux2Pipeline.from_pretrained(
    repo_id,
    text_encoder=None,
    transformer=transformer,
    torch_dtype=torch_dtype,
)
pipe.transformer.enable_group_offload(
    onload_device=device,
    offload_device="cpu",
    offload_type="leaf_level",
    use_stream=True,
    # low_cpu_mem_usage=True,  # uncomment for lower RAM usage
)
pipe.to(device)

prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
prompt_embeds = remote_text_encoder(prompt)

image = pipe(
    prompt_embeds=prompt_embeds,
    generator=torch.Generator(device=device).manual_seed(42),
    num_inference_steps=50,  # 28 is a good trade-off
    guidance_scale=4,
    height=1024,
    width=1024,
).images[0]

image.save("flux2_8GB_inference_output.png")
	import io
	import os

	import requests
	import torch

	from diffusers import Flux2Pipeline, Flux2Transformer2DModel


	# you will need to have ~6.5GB of free VRAM and ~40GB of free RAM to run this script (~10 if you enable
	# low_cpu_mem_usage=True)

	# need to have a hf access token https://huggingface.co/docs/hub/en/security-tokens
	# you can set it like this in linux: export HF_TOKEN="token"
	# or like this in windows: set HF_TOKEN="token"
	# also can uncomment the following line and set it directly here (not recommended for security reasons)
	# os.environ["HF_TOKEN"] = "token"


	repo_id = "diffusers/FLUX.2-dev-bnb-4bit"
	torch_dtype = torch.bfloat16
	device = "cuda"


	def remote_text_encoder(prompts: str \| list[str]):
	def _encode_single(prompt: str):
	response = requests.post(
	"https://remote-text-encoder-flux-2.huggingface.co/predict",
	json={"prompt": prompt},
	headers={"Authorization": f"Bearer {os.environ['HF_TOKEN']}", "Content-Type": "application/json"},
	)
	assert response.status_code == 200, f"{response.status_code=}"
	return torch.load(io.BytesIO(response.content))

	if isinstance(prompts, (list, tuple)):
	embeds = [_encode_single(p) for p in prompts]
	return torch.cat(embeds, dim=0)

	return _encode_single(prompts).to("cuda")


	transformer = Flux2Transformer2DModel.from_pretrained(
	repo_id, subfolder="transformer", torch_dtype=torch_dtype, device_map="cpu"
	)

	pipe = Flux2Pipeline.from_pretrained(
	repo_id,
	text_encoder=None,
	transformer=transformer,
	torch_dtype=torch_dtype,
	)
	pipe.transformer.enable_group_offload(
	onload_device=device,
	offload_device="cpu",
	offload_type="leaf_level",
	use_stream=True,
	# low_cpu_mem_usage=True, # uncomment for lower RAM usage
	)
	pipe.to(device)

	prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
	prompt_embeds = remote_text_encoder(prompt)

	image = pipe(
	prompt_embeds=prompt_embeds,
	generator=torch.Generator(device=device).manual_seed(42),
	num_inference_steps=50, # 28 is a good trade-off
	guidance_scale=4,
	height=1024,
	width=1024,
	).images[0]

	image.save("flux2_8GB_inference_output.png")
No results found