Last active
November 26, 2025 07:57
-
-
Save asomoza/301be3cc0dd9511cf555f8b18fa1bacc to your computer and use it in GitHub Desktop.
8GB VRAM Flux.2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import io | |
| import os | |
| import requests | |
| import torch | |
| from diffusers import Flux2Pipeline, Flux2Transformer2DModel | |
| # you will need to have ~6.5GB of free VRAM and ~40GB of free RAM to run this script (~10 if you enable | |
| # low_cpu_mem_usage=True) | |
| # need to have a hf access token https://huggingface.co/docs/hub/en/security-tokens | |
| # you can set it like this in linux: export HF_TOKEN="token" | |
| # or like this in windows: set HF_TOKEN="token" | |
| # also can uncomment the following line and set it directly here (not recommended for security reasons) | |
| # os.environ["HF_TOKEN"] = "token" | |
| repo_id = "diffusers/FLUX.2-dev-bnb-4bit" | |
| torch_dtype = torch.bfloat16 | |
| device = "cuda" | |
| def remote_text_encoder(prompts: str | list[str]): | |
| def _encode_single(prompt: str): | |
| response = requests.post( | |
| "https://remote-text-encoder-flux-2.huggingface.co/predict", | |
| json={"prompt": prompt}, | |
| headers={"Authorization": f"Bearer {os.environ['HF_TOKEN']}", "Content-Type": "application/json"}, | |
| ) | |
| assert response.status_code == 200, f"{response.status_code=}" | |
| return torch.load(io.BytesIO(response.content)) | |
| if isinstance(prompts, (list, tuple)): | |
| embeds = [_encode_single(p) for p in prompts] | |
| return torch.cat(embeds, dim=0) | |
| return _encode_single(prompts).to("cuda") | |
| transformer = Flux2Transformer2DModel.from_pretrained( | |
| repo_id, subfolder="transformer", torch_dtype=torch_dtype, device_map="cpu" | |
| ) | |
| pipe = Flux2Pipeline.from_pretrained( | |
| repo_id, | |
| text_encoder=None, | |
| transformer=transformer, | |
| torch_dtype=torch_dtype, | |
| ) | |
| pipe.transformer.enable_group_offload( | |
| onload_device=device, | |
| offload_device="cpu", | |
| offload_type="leaf_level", | |
| use_stream=True, | |
| # low_cpu_mem_usage=True, # uncomment for lower RAM usage | |
| ) | |
| pipe.to(device) | |
| prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom." | |
| prompt_embeds = remote_text_encoder(prompt) | |
| image = pipe( | |
| prompt_embeds=prompt_embeds, | |
| generator=torch.Generator(device=device).manual_seed(42), | |
| num_inference_steps=50, # 28 is a good trade-off | |
| guidance_scale=4, | |
| height=1024, | |
| width=1024, | |
| ).images[0] | |
| image.save("flux2_8GB_inference_output.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment