Skip to content

Instantly share code, notes, and snippets.

@tanukon
Created September 22, 2024 07:09
Show Gist options
  • Select an option

  • Save tanukon/27e5964a83fe58143f0f407a30e56c28 to your computer and use it in GitHub Desktop.

Select an option

Save tanukon/27e5964a83fe58143f0f407a30e56c28 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import os\n",
"import torch\n",
"import numpy as np\n",
"\n",
"from diffusers.utils import load_image\n",
"from diffusers.models import ControlNetModel\n",
"from huggingface_hub import hf_hub_download\n",
"\n",
"from insightface.app import FaceAnalysis\n",
"from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def collect_images(image_dir: str) -> list:\n",
" \n",
" image_path_list = []\n",
" \n",
" for root, _, files in os.walk(image_dir):\n",
" if len(files) > 0:\n",
" \n",
" for filename in files:\n",
" image_path = os.path.join(root, filename)\n",
" \n",
" if 'jpeg' in image_path:\n",
" image_path_list.append(image_path)\n",
" elif 'png' in image_path:\n",
" image_path_list.append(image_path)\n",
" elif 'jpg' in image_path:\n",
" image_path_list.append(image_path)\n",
" \n",
" return image_path_list "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Function for downloading necessary model weights"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def download_models():\n",
" hf_hub_download(repo_id=\"InstantX/InstantID\", filename=\"ControlNetModel/config.json\", local_dir=\"./checkpoints\")\n",
" hf_hub_download(repo_id=\"InstantX/InstantID\", filename=\"ControlNetModel/diffusion_pytorch_model.safetensors\", local_dir=\"./checkpoints\")\n",
" hf_hub_download(repo_id=\"InstantX/InstantID\", filename=\"ip-adapter.bin\", local_dir=\"./checkpoints\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Function for preparing face embedding and a cropped image"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def prepare_face_embeds(app, image_path: str):\n",
" # load an image\n",
" face_image = load_image(image_path)\n",
"\n",
" # prepare face emb\n",
" face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))\n",
" face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face\n",
" face_emb = face_info['embedding']\n",
" face_kps = draw_kps(face_image, face_info['kps'])\n",
" \n",
" return face_emb, face_kps"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Function for calculating mixed face embedding"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def calc_mixed_face_feats(emb1, emb2, alpha=0.2):\n",
"\n",
" new_emb = (1-alpha) * emb1 + alpha * emb2\n",
" \n",
" return new_emb"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Function for generating an image "
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"def generate_image(face_emb, face_kps, pipe, prompt, negative_prompt, latents=None):\n",
"\n",
" # generate image\n",
" with torch.inference_mode():\n",
" image = pipe(\n",
" prompt,\n",
" negative_prompt=negative_prompt,\n",
" image_embeds=face_emb,\n",
" image=face_kps,\n",
" controlnet_conditioning_scale=0.8,\n",
" ip_adapter_scale=0.8,\n",
" latents=latents,\n",
" ).images[0]\n",
"\n",
" return image"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## For Application 1: Image generation on custom dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def generate_id_preserving_image(image_dir: str, prompt: str, negative_prompt: str, result_dir: str = 'results'):\n",
" os.makedirs(result_dir, exist_ok=True)\n",
" prompt_list = prompt.split('|')\n",
" \n",
" # download models\n",
" download_models()\n",
" \n",
" # collect images\n",
" image_path_list = collect_images(image_dir=image_dir)\n",
" \n",
" # prepare 'antelopev2' under ./models\n",
" app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])\n",
" app.prepare(ctx_id=0, det_size=(640, 640))\n",
"\n",
" # prepare models under ./checkpoints\n",
" face_adapter = f'./checkpoints/ip-adapter.bin'\n",
" controlnet_path = f'./checkpoints/ControlNetModel'\n",
"\n",
" # load IdentityNet\n",
" controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)\n",
"\n",
" base_model = 'wangqixun/YamerMIX_v8' # from https://civitai.com/models/84040?modelVersionId=196039\n",
" pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(\n",
" base_model,\n",
" controlnet=controlnet,\n",
" torch_dtype=torch.float16\n",
" )\n",
" pipe.cuda()\n",
"\n",
" # load adapter\n",
" pipe.load_ip_adapter_instantid(face_adapter)\n",
" pipe.enable_model_cpu_offload()\n",
" \n",
" for image_path in image_path_list:\n",
" print('processing...', image_path)\n",
" image_name = os.path.basename(image_path).split('.')[0]\n",
" face_emb, face_kps = prepare_face_embeds(app, image_path)\n",
" image = generate_image(face_emb, face_kps, pipe, prompt, negative_prompt)\n",
" image.save(os.path.join(result_dir, f'{image_name}_{prompt_list[0]}.png'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## For Application 2 : ID interpolation between two images"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def generate_id_mixed_image(\n",
" image_path1: str, \n",
" image_path2: str, \n",
" prompt: str, \n",
" negative_prompt: str, \n",
" generator,\n",
" height: int = 800,\n",
" width: int = 800, \n",
" result_dir: str = 'results'\n",
" ):\n",
" os.makedirs(result_dir, exist_ok=True)\n",
" \n",
" image_name1 = os.path.basename(image_path1).split('.')[0]\n",
" image_name2 = os.path.basename(image_path2).split('.')[0]\n",
" \n",
" alphas = [0.2, 0.4, 0.6, 0.8]\n",
" \n",
" # download models\n",
" download_models()\n",
" \n",
" # prepare 'antelopev2' under ./models\n",
" app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])\n",
" app.prepare(ctx_id=0, det_size=(640, 640))\n",
"\n",
" # prepare models under ./checkpoints\n",
" face_adapter = f'./checkpoints/ip-adapter.bin'\n",
" controlnet_path = f'./checkpoints/ControlNetModel'\n",
"\n",
" # load IdentityNet\n",
" controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)\n",
"\n",
" base_model = 'wangqixun/YamerMIX_v8' # from https://civitai.com/models/84040?modelVersionId=196039\n",
" pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(\n",
" base_model,\n",
" controlnet=controlnet,\n",
" torch_dtype=torch.float16\n",
" )\n",
" pipe.cuda()\n",
"\n",
" # load adapter\n",
" pipe.load_ip_adapter_instantid(face_adapter)\n",
" pipe.enable_model_cpu_offload()\n",
" \n",
" latents = torch.randn(\n",
" (1, pipe.unet.config.in_channels, height // 8, width // 8),\n",
" generator=generator,\n",
" dtype=controlnet.dtype\n",
" )\n",
" \n",
" face_emb1, face_kps1 = prepare_face_embeds(app, image_path1)\n",
" face_emb2, face_kps2 = prepare_face_embeds(app, image_path2)\n",
" \n",
" for alpha in alphas:\n",
" face_emb = calc_mixed_face_feats(face_emb1, face_emb2, alpha=alpha)\n",
" \n",
" image = generate_image(face_emb, face_kps1, pipe, prompt, negative_prompt, latents)\n",
" image.save(os.path.join(result_dir, f'mixed_{image_name1}_{image_name2}_{alpha}.png'))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# the directory path containing your images\n",
"image_dir = './images'\n",
"\n",
"# the image path that you want to interpolate each other\n",
"image_path1 = 'images/kazuha_800.jpg'\n",
"image_path2 = 'images/sakura_800.jpg'\n",
"\n",
"# latents for ID interpolation\n",
"seed = 23\n",
"generator = torch.manual_seed(seed)\n",
"\n",
"# prompt\n",
"prompt = \"shiny|natural\"\n",
"negative_prompt = \"ugly, deformed, noisy, blurry, low contrast\""
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[1;31m2024-09-22 06:52:19.945732501 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
"\u001b[m\n",
"\u001b[1;31m2024-09-22 06:52:20.519726626 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
"\u001b[m\n",
"\u001b[1;31m2024-09-22 06:52:20.539679589 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
"\u001b[m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
"find model: ./models/antelopev2/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0\n",
"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
"find model: ./models/antelopev2/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0\n",
"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
"find model: ./models/antelopev2/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[1;31m2024-09-22 06:52:20.736158971 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
"\u001b[m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
"find model: ./models/antelopev2/glintr100.onnx recognition ['None', 3, 112, 112] 127.5 127.5\n",
"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
"find model: ./models/antelopev2/scrfd_10g_bnkps.onnx detection [1, 3, '?', '?'] 127.5 128.0\n",
"set det-size: (640, 640)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[1;31m2024-09-22 06:52:21.510209108 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
"\u001b[m\n",
"/opt/conda/envs/InstantID/lib/python3.10/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n",
"The config attributes {'controlnet_list': ['controlnet', 'RPMultiControlNetModel'], 'requires_aesthetics_score': False} were passed to StableDiffusionXLInstantIDPipeline, but are not expected and will be ignored. Please verify your model_index.json configuration file.\n",
"Keyword arguments {'controlnet_list': ['controlnet', 'RPMultiControlNetModel'], 'requires_aesthetics_score': False} are not expected by StableDiffusionXLInstantIDPipeline and will be ignored.\n",
"Loading pipeline components...: 100%|██████████| 7/7 [00:01<00:00, 5.10it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"processing... ./images/sakura_800.jpg\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/envs/InstantID/lib/python3.10/site-packages/insightface/utils/transform.py:68: FutureWarning: `rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.\n",
"To use the future default and silence this warning we advise to pass `rcond=None`, to keep using the old, explicitly pass `rcond=-1`.\n",
" P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([1, 4, 100, 100])\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [00:38<00:00, 1.28it/s]\n"
]
}
],
"source": [
"# Application 1\n",
"generate_id_preserving_image(prompt=prompt, negative_prompt=negative_prompt, image_dir=image_dir)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[1;31m2024-09-22 06:55:48.372176080 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
"\u001b[m\n",
"\u001b[1;31m2024-09-22 06:55:48.811149938 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
"\u001b[m\n",
"\u001b[1;31m2024-09-22 06:55:48.829186789 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
"\u001b[m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
"find model: ./models/antelopev2/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0\n",
"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
"find model: ./models/antelopev2/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0\n",
"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
"find model: ./models/antelopev2/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[1;31m2024-09-22 06:55:48.991040855 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
"\u001b[m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
"find model: ./models/antelopev2/glintr100.onnx recognition ['None', 3, 112, 112] 127.5 127.5\n",
"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
"find model: ./models/antelopev2/scrfd_10g_bnkps.onnx detection [1, 3, '?', '?'] 127.5 128.0\n",
"set det-size: (640, 640)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[1;31m2024-09-22 06:55:49.659596291 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
"\u001b[m\n",
"The config attributes {'controlnet_list': ['controlnet', 'RPMultiControlNetModel'], 'requires_aesthetics_score': False} were passed to StableDiffusionXLInstantIDPipeline, but are not expected and will be ignored. Please verify your model_index.json configuration file.\n",
"Keyword arguments {'controlnet_list': ['controlnet', 'RPMultiControlNetModel'], 'requires_aesthetics_score': False} are not expected by StableDiffusionXLInstantIDPipeline and will be ignored.\n",
"Loading pipeline components...: 100%|██████████| 7/7 [00:01<00:00, 5.62it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([1, 4, 100, 100])\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [00:39<00:00, 1.28it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([1, 4, 100, 100])\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [00:39<00:00, 1.27it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([1, 4, 100, 100])\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [00:39<00:00, 1.26it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([1, 4, 100, 100])\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [00:39<00:00, 1.26it/s]\n"
]
}
],
"source": [
"# Application 2\n",
"generate_id_mixed_image(image_path1=image_path1, image_path2=image_path2, prompt=prompt, negative_prompt=negative_prompt, generator=generator)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "InstantID",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment