tanukon/InstantID.ipynb

## InstantID.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cv2\n",
    "import os\n",
    "import torch\n",
    "import numpy as np\n",
    "\n",
    "from diffusers.utils import load_image\n",
    "from diffusers.models import ControlNetModel\n",
    "from huggingface_hub import hf_hub_download\n",
    "\n",
    "from insightface.app import FaceAnalysis\n",
    "from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def collect_images(image_dir: str) -> list:\n",
    "    \n",
    "    image_path_list = []\n",
    "    \n",
    "    for root, _, files in os.walk(image_dir):\n",
    "        if len(files) > 0:\n",
    "            \n",
    "            for filename in files:\n",
    "                image_path = os.path.join(root, filename)\n",
    "                \n",
    "                if 'jpeg' in image_path:\n",
    "                    image_path_list.append(image_path)\n",
    "                elif 'png' in image_path:\n",
    "                    image_path_list.append(image_path)\n",
    "                elif 'jpg' in image_path:\n",
    "                    image_path_list.append(image_path)\n",
    "                    \n",
    "    return image_path_list "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Function for downloading necessary model weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def download_models():\n",
    "    hf_hub_download(repo_id=\"InstantX/InstantID\", filename=\"ControlNetModel/config.json\", local_dir=\"./checkpoints\")\n",
    "    hf_hub_download(repo_id=\"InstantX/InstantID\", filename=\"ControlNetModel/diffusion_pytorch_model.safetensors\", local_dir=\"./checkpoints\")\n",
    "    hf_hub_download(repo_id=\"InstantX/InstantID\", filename=\"ip-adapter.bin\", local_dir=\"./checkpoints\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Function for preparing face embedding and a cropped image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def prepare_face_embeds(app, image_path: str):\n",
    "    # load an image\n",
    "    face_image = load_image(image_path)\n",
    "\n",
    "    # prepare face emb\n",
    "    face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))\n",
    "    face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1]  # only use the maximum face\n",
    "    face_emb = face_info['embedding']\n",
    "    face_kps = draw_kps(face_image, face_info['kps'])\n",
    "    \n",
    "    return face_emb, face_kps"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Function for calculating mixed face embedding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calc_mixed_face_feats(emb1, emb2, alpha=0.2):\n",
    "\n",
    "    new_emb = (1-alpha) * emb1 + alpha * emb2\n",
    "    \n",
    "    return new_emb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Function for generating an image "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_image(face_emb, face_kps, pipe, prompt, negative_prompt, latents=None):\n",
    "\n",
    "    # generate image\n",
    "    with torch.inference_mode():\n",
    "        image = pipe(\n",
    "            prompt,\n",
    "            negative_prompt=negative_prompt,\n",
    "            image_embeds=face_emb,\n",
    "            image=face_kps,\n",
    "            controlnet_conditioning_scale=0.8,\n",
    "            ip_adapter_scale=0.8,\n",
    "            latents=latents,\n",
    "        ).images[0]\n",
    "\n",
    "    return image"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## For Application 1: Image generation on custom dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_id_preserving_image(image_dir: str, prompt: str, negative_prompt: str, result_dir: str = 'results'):\n",
    "    os.makedirs(result_dir, exist_ok=True)\n",
    "    prompt_list = prompt.split('|')\n",
    "    \n",
    "    # download models\n",
    "    download_models()\n",
    "    \n",
    "    # collect images\n",
    "    image_path_list = collect_images(image_dir=image_dir)\n",
    "    \n",
    "    # prepare 'antelopev2' under ./models\n",
    "    app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])\n",
    "    app.prepare(ctx_id=0, det_size=(640, 640))\n",
    "\n",
    "    # prepare models under ./checkpoints\n",
    "    face_adapter = f'./checkpoints/ip-adapter.bin'\n",
    "    controlnet_path = f'./checkpoints/ControlNetModel'\n",
    "\n",
    "    # load IdentityNet\n",
    "    controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)\n",
    "\n",
    "    base_model = 'wangqixun/YamerMIX_v8'  # from https://civitai.com/models/84040?modelVersionId=196039\n",
    "    pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(\n",
    "        base_model,\n",
    "        controlnet=controlnet,\n",
    "        torch_dtype=torch.float16\n",
    "    )\n",
    "    pipe.cuda()\n",
    "\n",
    "    # load adapter\n",
    "    pipe.load_ip_adapter_instantid(face_adapter)\n",
    "    pipe.enable_model_cpu_offload()\n",
    "        \n",
    "    for image_path in image_path_list:\n",
    "        print('processing...', image_path)\n",
    "        image_name = os.path.basename(image_path).split('.')[0]\n",
    "        face_emb, face_kps = prepare_face_embeds(app, image_path)\n",
    "        image = generate_image(face_emb, face_kps, pipe, prompt, negative_prompt)\n",
    "        image.save(os.path.join(result_dir, f'{image_name}_{prompt_list[0]}.png'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## For Application 2 : ID interpolation between two images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_id_mixed_image(\n",
    "        image_path1: str, \n",
    "        image_path2: str, \n",
    "        prompt: str, \n",
    "        negative_prompt: str, \n",
    "        generator,\n",
    "        height: int = 800,\n",
    "        width: int = 800, \n",
    "        result_dir: str = 'results'\n",
    "    ):\n",
    "    os.makedirs(result_dir, exist_ok=True)\n",
    "    \n",
    "    image_name1 = os.path.basename(image_path1).split('.')[0]\n",
    "    image_name2 = os.path.basename(image_path2).split('.')[0]\n",
    "    \n",
    "    alphas = [0.2, 0.4, 0.6, 0.8]\n",
    "    \n",
    "    # download models\n",
    "    download_models()\n",
    "    \n",
    "    # prepare 'antelopev2' under ./models\n",
    "    app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])\n",
    "    app.prepare(ctx_id=0, det_size=(640, 640))\n",
    "\n",
    "    # prepare models under ./checkpoints\n",
    "    face_adapter = f'./checkpoints/ip-adapter.bin'\n",
    "    controlnet_path = f'./checkpoints/ControlNetModel'\n",
    "\n",
    "    # load IdentityNet\n",
    "    controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)\n",
    "\n",
    "    base_model = 'wangqixun/YamerMIX_v8'  # from https://civitai.com/models/84040?modelVersionId=196039\n",
    "    pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(\n",
    "        base_model,\n",
    "        controlnet=controlnet,\n",
    "        torch_dtype=torch.float16\n",
    "    )\n",
    "    pipe.cuda()\n",
    "\n",
    "    # load adapter\n",
    "    pipe.load_ip_adapter_instantid(face_adapter)\n",
    "    pipe.enable_model_cpu_offload()\n",
    "    \n",
    "    latents = torch.randn(\n",
    "        (1, pipe.unet.config.in_channels, height // 8, width // 8),\n",
    "        generator=generator,\n",
    "        dtype=controlnet.dtype\n",
    "    )\n",
    "    \n",
    "    face_emb1, face_kps1 = prepare_face_embeds(app, image_path1)\n",
    "    face_emb2, face_kps2 = prepare_face_embeds(app, image_path2)\n",
    "    \n",
    "    for alpha in alphas:\n",
    "        face_emb = calc_mixed_face_feats(face_emb1, face_emb2, alpha=alpha)\n",
    "    \n",
    "        image = generate_image(face_emb, face_kps1, pipe, prompt, negative_prompt, latents)\n",
    "        image.save(os.path.join(result_dir, f'mixed_{image_name1}_{image_name2}_{alpha}.png'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# the directory path containing your images\n",
    "image_dir = './images'\n",
    "\n",
    "# the image path that you want to interpolate each other\n",
    "image_path1 = 'images/kazuha_800.jpg'\n",
    "image_path2 = 'images/sakura_800.jpg'\n",
    "\n",
    "# latents for ID interpolation\n",
    "seed = 23\n",
    "generator = torch.manual_seed(seed)\n",
    "\n",
    "# prompt\n",
    "prompt = \"shiny|natural\"\n",
    "negative_prompt = \"ugly, deformed, noisy, blurry, low contrast\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1;31m2024-09-22 06:52:19.945732501 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
      "\u001b[m\n",
      "\u001b[1;31m2024-09-22 06:52:20.519726626 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
      "\u001b[m\n",
      "\u001b[1;31m2024-09-22 06:52:20.539679589 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
      "\u001b[m\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
      "find model: ./models/antelopev2/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0\n",
      "Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
      "find model: ./models/antelopev2/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0\n",
      "Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
      "find model: ./models/antelopev2/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1;31m2024-09-22 06:52:20.736158971 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
      "\u001b[m\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
      "find model: ./models/antelopev2/glintr100.onnx recognition ['None', 3, 112, 112] 127.5 127.5\n",
      "Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
      "find model: ./models/antelopev2/scrfd_10g_bnkps.onnx detection [1, 3, '?', '?'] 127.5 128.0\n",
      "set det-size: (640, 640)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1;31m2024-09-22 06:52:21.510209108 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
      "\u001b[m\n",
      "/opt/conda/envs/InstantID/lib/python3.10/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
      "  warnings.warn(\n",
      "The config attributes {'controlnet_list': ['controlnet', 'RPMultiControlNetModel'], 'requires_aesthetics_score': False} were passed to StableDiffusionXLInstantIDPipeline, but are not expected and will be ignored. Please verify your model_index.json configuration file.\n",
      "Keyword arguments {'controlnet_list': ['controlnet', 'RPMultiControlNetModel'], 'requires_aesthetics_score': False} are not expected by StableDiffusionXLInstantIDPipeline and will be ignored.\n",
      "Loading pipeline components...: 100%|██████████| 7/7 [00:01<00:00,  5.10it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "processing... ./images/sakura_800.jpg\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/envs/InstantID/lib/python3.10/site-packages/insightface/utils/transform.py:68: FutureWarning: `rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.\n",
      "To use the future default and silence this warning we advise to pass `rcond=None`, to keep using the old, explicitly pass `rcond=-1`.\n",
      "  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([1, 4, 100, 100])\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 50/50 [00:38<00:00,  1.28it/s]\n"
     ]
    }
   ],
   "source": [
    "# Application 1\n",
    "generate_id_preserving_image(prompt=prompt, negative_prompt=negative_prompt, image_dir=image_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1;31m2024-09-22 06:55:48.372176080 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
      "\u001b[m\n",
      "\u001b[1;31m2024-09-22 06:55:48.811149938 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
      "\u001b[m\n",
      "\u001b[1;31m2024-09-22 06:55:48.829186789 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
      "\u001b[m\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
      "find model: ./models/antelopev2/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0\n",
      "Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
      "find model: ./models/antelopev2/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0\n",
      "Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
      "find model: ./models/antelopev2/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1;31m2024-09-22 06:55:48.991040855 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
      "\u001b[m\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
      "find model: ./models/antelopev2/glintr100.onnx recognition ['None', 3, 112, 112] 127.5 127.5\n",
      "Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
      "find model: ./models/antelopev2/scrfd_10g_bnkps.onnx detection [1, 3, '?', '?'] 127.5 128.0\n",
      "set det-size: (640, 640)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1;31m2024-09-22 06:55:49.659596291 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
      "\u001b[m\n",
      "The config attributes {'controlnet_list': ['controlnet', 'RPMultiControlNetModel'], 'requires_aesthetics_score': False} were passed to StableDiffusionXLInstantIDPipeline, but are not expected and will be ignored. Please verify your model_index.json configuration file.\n",
      "Keyword arguments {'controlnet_list': ['controlnet', 'RPMultiControlNetModel'], 'requires_aesthetics_score': False} are not expected by StableDiffusionXLInstantIDPipeline and will be ignored.\n",
      "Loading pipeline components...: 100%|██████████| 7/7 [00:01<00:00,  5.62it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([1, 4, 100, 100])\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 50/50 [00:39<00:00,  1.28it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([1, 4, 100, 100])\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 50/50 [00:39<00:00,  1.27it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([1, 4, 100, 100])\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 50/50 [00:39<00:00,  1.26it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([1, 4, 100, 100])\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 50/50 [00:39<00:00,  1.26it/s]\n"
     ]
    }
   ],
   "source": [
    "# Application 2\n",
    "generate_id_mixed_image(image_path1=image_path1, image_path2=image_path2, prompt=prompt, negative_prompt=negative_prompt, generator=generator)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "InstantID",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"import cv2\n",
	"import os\n",
	"import torch\n",
	"import numpy as np\n",
	"\n",
	"from diffusers.utils import load_image\n",
	"from diffusers.models import ControlNetModel\n",
	"from huggingface_hub import hf_hub_download\n",
	"\n",
	"from insightface.app import FaceAnalysis\n",
	"from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"def collect_images(image_dir: str) -> list:\n",
	" \n",
	" image_path_list = []\n",
	" \n",
	" for root, _, files in os.walk(image_dir):\n",
	" if len(files) > 0:\n",
	" \n",
	" for filename in files:\n",
	" image_path = os.path.join(root, filename)\n",
	" \n",
	" if 'jpeg' in image_path:\n",
	" image_path_list.append(image_path)\n",
	" elif 'png' in image_path:\n",
	" image_path_list.append(image_path)\n",
	" elif 'jpg' in image_path:\n",
	" image_path_list.append(image_path)\n",
	" \n",
	" return image_path_list "
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Function for downloading necessary model weights"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [],
	"source": [
	"def download_models():\n",
	" hf_hub_download(repo_id=\"InstantX/InstantID\", filename=\"ControlNetModel/config.json\", local_dir=\"./checkpoints\")\n",
	" hf_hub_download(repo_id=\"InstantX/InstantID\", filename=\"ControlNetModel/diffusion_pytorch_model.safetensors\", local_dir=\"./checkpoints\")\n",
	" hf_hub_download(repo_id=\"InstantX/InstantID\", filename=\"ip-adapter.bin\", local_dir=\"./checkpoints\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Function for preparing face embedding and a cropped image"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"def prepare_face_embeds(app, image_path: str):\n",
	" # load an image\n",
	" face_image = load_image(image_path)\n",
	"\n",
	" # prepare face emb\n",
	" face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))\n",
	" face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face\n",
	" face_emb = face_info['embedding']\n",
	" face_kps = draw_kps(face_image, face_info['kps'])\n",
	" \n",
	" return face_emb, face_kps"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Function for calculating mixed face embedding"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [],
	"source": [
	"def calc_mixed_face_feats(emb1, emb2, alpha=0.2):\n",
	"\n",
	" new_emb = (1-alpha) * emb1 + alpha * emb2\n",
	" \n",
	" return new_emb"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Function for generating an image "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [],
	"source": [
	"def generate_image(face_emb, face_kps, pipe, prompt, negative_prompt, latents=None):\n",
	"\n",
	" # generate image\n",
	" with torch.inference_mode():\n",
	" image = pipe(\n",
	" prompt,\n",
	" negative_prompt=negative_prompt,\n",
	" image_embeds=face_emb,\n",
	" image=face_kps,\n",
	" controlnet_conditioning_scale=0.8,\n",
	" ip_adapter_scale=0.8,\n",
	" latents=latents,\n",
	" ).images[0]\n",
	"\n",
	" return image"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## For Application 1: Image generation on custom dataset"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"def generate_id_preserving_image(image_dir: str, prompt: str, negative_prompt: str, result_dir: str = 'results'):\n",
	" os.makedirs(result_dir, exist_ok=True)\n",
	" prompt_list = prompt.split('\|')\n",
	" \n",
	" # download models\n",
	" download_models()\n",
	" \n",
	" # collect images\n",
	" image_path_list = collect_images(image_dir=image_dir)\n",
	" \n",
	" # prepare 'antelopev2' under ./models\n",
	" app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])\n",
	" app.prepare(ctx_id=0, det_size=(640, 640))\n",
	"\n",
	" # prepare models under ./checkpoints\n",
	" face_adapter = f'./checkpoints/ip-adapter.bin'\n",
	" controlnet_path = f'./checkpoints/ControlNetModel'\n",
	"\n",
	" # load IdentityNet\n",
	" controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)\n",
	"\n",
	" base_model = 'wangqixun/YamerMIX_v8' # from https://civitai.com/models/84040?modelVersionId=196039\n",
	" pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(\n",
	" base_model,\n",
	" controlnet=controlnet,\n",
	" torch_dtype=torch.float16\n",
	" )\n",
	" pipe.cuda()\n",
	"\n",
	" # load adapter\n",
	" pipe.load_ip_adapter_instantid(face_adapter)\n",
	" pipe.enable_model_cpu_offload()\n",
	" \n",
	" for image_path in image_path_list:\n",
	" print('processing...', image_path)\n",
	" image_name = os.path.basename(image_path).split('.')[0]\n",
	" face_emb, face_kps = prepare_face_embeds(app, image_path)\n",
	" image = generate_image(face_emb, face_kps, pipe, prompt, negative_prompt)\n",
	" image.save(os.path.join(result_dir, f'{image_name}_{prompt_list[0]}.png'))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## For Application 2 : ID interpolation between two images"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [],
	"source": [
	"def generate_id_mixed_image(\n",
	" image_path1: str, \n",
	" image_path2: str, \n",
	" prompt: str, \n",
	" negative_prompt: str, \n",
	" generator,\n",
	" height: int = 800,\n",
	" width: int = 800, \n",
	" result_dir: str = 'results'\n",
	" ):\n",
	" os.makedirs(result_dir, exist_ok=True)\n",
	" \n",
	" image_name1 = os.path.basename(image_path1).split('.')[0]\n",
	" image_name2 = os.path.basename(image_path2).split('.')[0]\n",
	" \n",
	" alphas = [0.2, 0.4, 0.6, 0.8]\n",
	" \n",
	" # download models\n",
	" download_models()\n",
	" \n",
	" # prepare 'antelopev2' under ./models\n",
	" app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])\n",
	" app.prepare(ctx_id=0, det_size=(640, 640))\n",
	"\n",
	" # prepare models under ./checkpoints\n",
	" face_adapter = f'./checkpoints/ip-adapter.bin'\n",
	" controlnet_path = f'./checkpoints/ControlNetModel'\n",
	"\n",
	" # load IdentityNet\n",
	" controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)\n",
	"\n",
	" base_model = 'wangqixun/YamerMIX_v8' # from https://civitai.com/models/84040?modelVersionId=196039\n",
	" pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(\n",
	" base_model,\n",
	" controlnet=controlnet,\n",
	" torch_dtype=torch.float16\n",
	" )\n",
	" pipe.cuda()\n",
	"\n",
	" # load adapter\n",
	" pipe.load_ip_adapter_instantid(face_adapter)\n",
	" pipe.enable_model_cpu_offload()\n",
	" \n",
	" latents = torch.randn(\n",
	" (1, pipe.unet.config.in_channels, height // 8, width // 8),\n",
	" generator=generator,\n",
	" dtype=controlnet.dtype\n",
	" )\n",
	" \n",
	" face_emb1, face_kps1 = prepare_face_embeds(app, image_path1)\n",
	" face_emb2, face_kps2 = prepare_face_embeds(app, image_path2)\n",
	" \n",
	" for alpha in alphas:\n",
	" face_emb = calc_mixed_face_feats(face_emb1, face_emb2, alpha=alpha)\n",
	" \n",
	" image = generate_image(face_emb, face_kps1, pipe, prompt, negative_prompt, latents)\n",
	" image.save(os.path.join(result_dir, f'mixed_{image_name1}_{image_name2}_{alpha}.png'))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [],
	"source": [
	"# the directory path containing your images\n",
	"image_dir = './images'\n",
	"\n",
	"# the image path that you want to interpolate each other\n",
	"image_path1 = 'images/kazuha_800.jpg'\n",
	"image_path2 = 'images/sakura_800.jpg'\n",
	"\n",
	"# latents for ID interpolation\n",
	"seed = 23\n",
	"generator = torch.manual_seed(seed)\n",
	"\n",
	"# prompt\n",
	"prompt = \"shiny\|natural\"\n",
	"negative_prompt = \"ugly, deformed, noisy, blurry, low contrast\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"\u001b[1;31m2024-09-22 06:52:19.945732501 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
	"\u001b[m\n",
	"\u001b[1;31m2024-09-22 06:52:20.519726626 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
	"\u001b[m\n",
	"\u001b[1;31m2024-09-22 06:52:20.539679589 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
	"\u001b[m\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
	"find model: ./models/antelopev2/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0\n",
	"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
	"find model: ./models/antelopev2/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0\n",
	"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
	"find model: ./models/antelopev2/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"\u001b[1;31m2024-09-22 06:52:20.736158971 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
	"\u001b[m\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
	"find model: ./models/antelopev2/glintr100.onnx recognition ['None', 3, 112, 112] 127.5 127.5\n",
	"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
	"find model: ./models/antelopev2/scrfd_10g_bnkps.onnx detection [1, 3, '?', '?'] 127.5 128.0\n",
	"set det-size: (640, 640)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"\u001b[1;31m2024-09-22 06:52:21.510209108 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
	"\u001b[m\n",
	"/opt/conda/envs/InstantID/lib/python3.10/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
	" warnings.warn(\n",
	"The config attributes {'controlnet_list': ['controlnet', 'RPMultiControlNetModel'], 'requires_aesthetics_score': False} were passed to StableDiffusionXLInstantIDPipeline, but are not expected and will be ignored. Please verify your model_index.json configuration file.\n",
	"Keyword arguments {'controlnet_list': ['controlnet', 'RPMultiControlNetModel'], 'requires_aesthetics_score': False} are not expected by StableDiffusionXLInstantIDPipeline and will be ignored.\n",
	"Loading pipeline components...: 100%\|██████████\| 7/7 [00:01<00:00, 5.10it/s]\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"processing... ./images/sakura_800.jpg\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/opt/conda/envs/InstantID/lib/python3.10/site-packages/insightface/utils/transform.py:68: FutureWarning: `rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.\n",
	"To use the future default and silence this warning we advise to pass `rcond=None`, to keep using the old, explicitly pass `rcond=-1`.\n",
	" P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"torch.Size([1, 4, 100, 100])\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"100%\|██████████\| 50/50 [00:38<00:00, 1.28it/s]\n"
	]
	}
	],
	"source": [
	"# Application 1\n",
	"generate_id_preserving_image(prompt=prompt, negative_prompt=negative_prompt, image_dir=image_dir)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"\u001b[1;31m2024-09-22 06:55:48.372176080 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
	"\u001b[m\n",
	"\u001b[1;31m2024-09-22 06:55:48.811149938 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
	"\u001b[m\n",
	"\u001b[1;31m2024-09-22 06:55:48.829186789 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
	"\u001b[m\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
	"find model: ./models/antelopev2/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0\n",
	"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
	"find model: ./models/antelopev2/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0\n",
	"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
	"find model: ./models/antelopev2/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"\u001b[1;31m2024-09-22 06:55:48.991040855 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
	"\u001b[m\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
	"find model: ./models/antelopev2/glintr100.onnx recognition ['None', 3, 112, 112] 127.5 127.5\n",
	"Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}\n",
	"find model: ./models/antelopev2/scrfd_10g_bnkps.onnx detection [1, 3, '?', '?'] 127.5 128.0\n",
	"set det-size: (640, 640)\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"\u001b[1;31m2024-09-22 06:55:49.659596291 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.12: cannot open shared object file: No such file or directory\n",
	"\u001b[m\n",
	"The config attributes {'controlnet_list': ['controlnet', 'RPMultiControlNetModel'], 'requires_aesthetics_score': False} were passed to StableDiffusionXLInstantIDPipeline, but are not expected and will be ignored. Please verify your model_index.json configuration file.\n",
	"Keyword arguments {'controlnet_list': ['controlnet', 'RPMultiControlNetModel'], 'requires_aesthetics_score': False} are not expected by StableDiffusionXLInstantIDPipeline and will be ignored.\n",
	"Loading pipeline components...: 100%\|██████████\| 7/7 [00:01<00:00, 5.62it/s]\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"torch.Size([1, 4, 100, 100])\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"100%\|██████████\| 50/50 [00:39<00:00, 1.28it/s]\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"torch.Size([1, 4, 100, 100])\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"100%\|██████████\| 50/50 [00:39<00:00, 1.27it/s]\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"torch.Size([1, 4, 100, 100])\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"100%\|██████████\| 50/50 [00:39<00:00, 1.26it/s]\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"torch.Size([1, 4, 100, 100])\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"100%\|██████████\| 50/50 [00:39<00:00, 1.26it/s]\n"
	]
	}
	],
	"source": [
	"# Application 2\n",
	"generate_id_mixed_image(image_path1=image_path1, image_path2=image_path2, prompt=prompt, negative_prompt=negative_prompt, generator=generator)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "InstantID",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.15"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
No results found