Skip to content

Instantly share code, notes, and snippets.

@asomoza
Last active November 26, 2025 07:57
Show Gist options
  • Select an option

  • Save asomoza/301be3cc0dd9511cf555f8b18fa1bacc to your computer and use it in GitHub Desktop.

Select an option

Save asomoza/301be3cc0dd9511cf555f8b18fa1bacc to your computer and use it in GitHub Desktop.
8GB VRAM Flux.2
import io
import os
import requests
import torch
from diffusers import Flux2Pipeline, Flux2Transformer2DModel
# you will need to have ~6.5GB of free VRAM and ~40GB of free RAM to run this script (~10 if you enable
# low_cpu_mem_usage=True)
# need to have a hf access token https://huggingface.co/docs/hub/en/security-tokens
# you can set it like this in linux: export HF_TOKEN="token"
# or like this in windows: set HF_TOKEN="token"
# also can uncomment the following line and set it directly here (not recommended for security reasons)
# os.environ["HF_TOKEN"] = "token"
repo_id = "diffusers/FLUX.2-dev-bnb-4bit"
torch_dtype = torch.bfloat16
device = "cuda"
def remote_text_encoder(prompts: str | list[str]):
def _encode_single(prompt: str):
response = requests.post(
"https://remote-text-encoder-flux-2.huggingface.co/predict",
json={"prompt": prompt},
headers={"Authorization": f"Bearer {os.environ['HF_TOKEN']}", "Content-Type": "application/json"},
)
assert response.status_code == 200, f"{response.status_code=}"
return torch.load(io.BytesIO(response.content))
if isinstance(prompts, (list, tuple)):
embeds = [_encode_single(p) for p in prompts]
return torch.cat(embeds, dim=0)
return _encode_single(prompts).to("cuda")
transformer = Flux2Transformer2DModel.from_pretrained(
repo_id, subfolder="transformer", torch_dtype=torch_dtype, device_map="cpu"
)
pipe = Flux2Pipeline.from_pretrained(
repo_id,
text_encoder=None,
transformer=transformer,
torch_dtype=torch_dtype,
)
pipe.transformer.enable_group_offload(
onload_device=device,
offload_device="cpu",
offload_type="leaf_level",
use_stream=True,
# low_cpu_mem_usage=True, # uncomment for lower RAM usage
)
pipe.to(device)
prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
prompt_embeds = remote_text_encoder(prompt)
image = pipe(
prompt_embeds=prompt_embeds,
generator=torch.Generator(device=device).manual_seed(42),
num_inference_steps=50, # 28 is a good trade-off
guidance_scale=4,
height=1024,
width=1024,
).images[0]
image.save("flux2_8GB_inference_output.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment