caleb-kaiser/3-0-evaluation.ipynb

## 3-0-evaluation.ipynb
{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/caleb-kaiser/2cbcce465ffa5e856af07151c202a51c/3-0-evaluation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "<img src=\"https://raw.githubusercontent.com/comet-ml/opik/main/apps/opik-documentation/documentation/static/img/opik-logo.svg\" width=\"250\"/>"
      ],
      "metadata": {
        "id": "tO9p7St93Pa0"
      }
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "vqfKhJcs92nt"
      },
      "source": [
        "# Evaluation with Opik\n",
        "\n",
        "In this exercise, you'll implement a basic evaluation pipeline with Opik. You can use OpenAI or open source models via LiteLLM"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Imports & Configuration"
      ],
      "metadata": {
        "id": "UhlBep2DdIKx"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "! pip install opik openai comet_ml litellm --quiet"
      ],
      "metadata": {
        "id": "mDiX7cUYdLiQ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "5xcX30we92nx"
      },
      "outputs": [],
      "source": [
        "import opik\n",
        "from opik import Opik, track\n",
        "from opik.evaluation import evaluate\n",
        "from opik.evaluation.metrics import (IsJson)\n",
        "from opik.integrations.openai import track_openai\n",
        "import openai\n",
        "import os\n",
        "from datetime import datetime\n",
        "from getpass import getpass\n",
        "import litellm\n",
        "\n",
        "# Define project name to enable tracing\n",
        "os.environ[\"OPIK_PROJECT_NAME\"] = \"food_chatbot_eval\""
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Opik configuration\n",
        "if \"OPIK_API_KEY\" not in os.environ:\n",
        "  os.environ[\"OPIK_API_KEY\"] = getpass(\"Enter your Opik API key: \")\n",
        "\n",
        "opik.configure()"
      ],
      "metadata": {
        "id": "NcJ25mYXdb58"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# OpenAI configuration (ignore if you're using LiteLLM)\n",
        "if \"OPENAI_API_KEY\" not in os.environ:\n",
        "    os.environ[\"OPENAI_API_KEY\"] = getpass(\"Enter your OpenAI API key: \")"
      ],
      "metadata": {
        "id": "b7nLcmo70kgD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "MODEL = \"gpt-4o-mini\""
      ],
      "metadata": {
        "id": "Qrp4OjVKdTsI"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "client = opik.Opik()"
      ],
      "metadata": {
        "id": "IT1T_ilrel9J"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Dataset"
      ],
      "metadata": {
        "id": "aaUhMjZhlknD"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Create or get the dataset\n",
        "dataset = client.get_or_create_dataset(name=\"foodchatbot_eval\")"
      ],
      "metadata": {
        "id": "NjD0upvAlmRN"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Optional: Download Dataset From Comet"
      ],
      "metadata": {
        "id": "V0ymGQlYmzdT"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "If you have not previously created the `foodchatbot_eval` dataset in your Opik workspace, run the following code to download the dataset as a Comet Artifact and populate your Opik dataset.\n",
        "\n",
        "If you have already created the `foodchatbot_eval` dataset, you can skip to the next section"
      ],
      "metadata": {
        "id": "FskOHALFmdTA"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import comet_ml"
      ],
      "metadata": {
        "id": "0D9BaSz6lmOu"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "experiment = comet_ml.start(project_name=\"foodchatbot_eval\")\n",
        "\n",
        "logged_artifact = experiment.get_artifact(artifact_name=\"foodchatbot_eval\",\n",
        "                                        workspace=\"examples\")\n",
        "local_artifact = logged_artifact.download(\"./\")\n",
        "experiment.end()"
      ],
      "metadata": {
        "id": "UMzbSBCPlmIR"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import csv\n",
        "import json\n",
        "# Read the CSV file and insert items into the dataset\n",
        "with open('./foodchatbot_clean_eval_dataset.csv', newline='') as csvfile:\n",
        "    reader = csv.reader(csvfile)\n",
        "    for row in reader:\n",
        "        index, question, response = row\n",
        "        item = {\n",
        "            \"index\": index,\n",
        "            \"question\": question,\n",
        "            \"response\": response\n",
        "        }\n",
        "\n",
        "        dataset.insert([item])"
      ],
      "metadata": {
        "id": "s8IpjR9Bm920"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Templates & Prompts"
      ],
      "metadata": {
        "id": "o87OB7Eqe24p"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# menu items\n",
        "menu_items = \"\"\"\n",
        "Menu: Kids Menu\n",
        "Food Item: Mini Cheeseburger\n",
        "Price: $6.99\n",
        "Vegan: N\n",
        "Popularity: 4/5\n",
        "Included: Mini beef patty, cheese, lettuce, tomato, and fries.\n",
        "\n",
        "Menu: Appetizers\n",
        "Food Item: Loaded Potato Skins\n",
        "Price: $8.99\n",
        "Vegan: N\n",
        "Popularity: 3/5\n",
        "Included: Crispy potato skins filled with cheese, bacon bits, and served with sour cream.\n",
        "\n",
        "Menu: Appetizers\n",
        "Food Item: Bruschetta\n",
        "Price: $7.99\n",
        "Vegan: Y\n",
        "Popularity: 4/5\n",
        "Included: Toasted baguette slices topped with fresh tomatoes, basil, garlic, and balsamic glaze.\n",
        "\n",
        "Menu: Main Menu\n",
        "Food Item: Grilled Chicken Caesar Salad\n",
        "Price: $12.99\n",
        "Vegan: N\n",
        "Popularity: 4/5\n",
        "Included: Grilled chicken breast, romaine lettuce, Parmesan cheese, croutons, and Caesar dressing.\n",
        "\n",
        "Menu: Main Menu\n",
        "Food Item: Classic Cheese Pizza\n",
        "Price: $10.99\n",
        "Vegan: N\n",
        "Popularity: 5/5\n",
        "Included: Thin-crust pizza topped with tomato sauce, mozzarella cheese, and fresh basil.\n",
        "\n",
        "Menu: Main Menu\n",
        "Food Item: Spaghetti Bolognese\n",
        "Price: $14.99\n",
        "Vegan: N\n",
        "Popularity: 4/5\n",
        "Included: Pasta tossed in a savory meat sauce made with ground beef, tomatoes, onions, and herbs.\n",
        "\n",
        "Menu: Vegan Options\n",
        "Food Item: Veggie Wrap\n",
        "Price: $9.99\n",
        "Vegan: Y\n",
        "Popularity: 3/5\n",
        "Included: Grilled vegetables, hummus, mixed greens, and a wrap served with a side of sweet potato fries.\n",
        "\n",
        "Menu: Vegan Options\n",
        "Food Item: Vegan Beyond Burger\n",
        "Price: $11.99\n",
        "Vegan: Y\n",
        "Popularity: 4/5\n",
        "Included: Plant-based patty, vegan cheese, lettuce, tomato, onion, and a choice of regular or sweet potato fries.\n",
        "\n",
        "Menu: Desserts\n",
        "Food Item: Chocolate Lava Cake\n",
        "Price: $6.99\n",
        "Vegan: N\n",
        "Popularity: 5/5\n",
        "Included: Warm chocolate cake with a gooey molten center, served with vanilla ice cream.\n",
        "\n",
        "Menu: Desserts\n",
        "Food Item: Fresh Berry Parfait\n",
        "Price: $5.99\n",
        "Vegan: Y\n",
        "Popularity: 4/5\n",
        "Included: Layers of mixed berries, granola, and vegan coconut yogurt.\n",
        "\"\"\"\n"
      ],
      "metadata": {
        "id": "UddfOiIFe4ja"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "prompt_template = \"\"\"Answer a question about the following menu:\n",
        "\n",
        "# MENU\n",
        "{menu}\n",
        "\n",
        "# QUESTION\n",
        "{question}\n",
        "\"\"\""
      ],
      "metadata": {
        "id": "TP3uUuY9e5jW"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "j0viO7Gqe5hJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "qFrieyPRe5e-"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "MWL8el1ve5c1"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "xWyIaDwIe5Z4"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "G3-pMk6p92nz"
      },
      "source": [
        "# LLM Application\n"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Simple little client class for using different LLM APIs (OpenAI or LiteLLM)\n",
        "class LLMClient:\n",
        "  def __init__(self, client_type: str =\"openai\", model: str =\"gpt-4\"):\n",
        "    self.client_type = client_type\n",
        "    self.model = model\n",
        "\n",
        "    if self.client_type == \"openai\":\n",
        "      self.client = track_openai(openai.OpenAI())\n",
        "\n",
        "    else:\n",
        "      self.client = None\n",
        "\n",
        "  # LiteLLM query function\n",
        "  def _get_litellm_response(self, query: str, system: str = \"You are a helpful assistant.\"):\n",
        "    messages = [\n",
        "        {\"role\": \"system\", \"content\": system },\n",
        "        { \"role\": \"user\", \"content\": query }\n",
        "    ]\n",
        "\n",
        "    response = litellm.completion(\n",
        "        model=self.model,\n",
        "        messages=messages\n",
        "    )\n",
        "\n",
        "    return response.choices[0].message.content\n",
        "\n",
        "  # OpenAI query function - use **kwargs to pass arguments like temperature\n",
        "  def _get_openai_response(self, query: str, system: str = \"You are a helpful assistant.\", **kwargs):\n",
        "    messages = [\n",
        "        {\"role\": \"system\", \"content\": system },\n",
        "        { \"role\": \"user\", \"content\": query }\n",
        "    ]\n",
        "\n",
        "    response = self.client.chat.completions.create(\n",
        "        model=self.model,\n",
        "        messages=messages,\n",
        "        **kwargs\n",
        "    )\n",
        "\n",
        "    return response.choices[0].message.content\n",
        "\n",
        "\n",
        "  def query(self, query: str, system: str = \"You are a helpful assistant.\", **kwargs):\n",
        "    if self.client_type == 'openai':\n",
        "      return self._get_openai_response(query, system, **kwargs)\n",
        "\n",
        "    else:\n",
        "      return self._get_litellm_response(query, system)\n",
        "\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "id": "3rhh1oX6fUTz"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "llm_client = LLMClient(model=MODEL)"
      ],
      "metadata": {
        "id": "w1e6ceRpfZiJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "m6s3Rk9u92n0"
      },
      "outputs": [],
      "source": [
        "@track\n",
        "def chatbot_application(input: str) -> str:\n",
        "    response = llm_client.query(prompt_template.format(menu=menu_items, question=input))\n",
        "    return response\n",
        "\n"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Evaluation"
      ],
      "metadata": {
        "id": "hkGXuEY33Dc7"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Define the evaluation task\n",
        "def evaluation_task(x):\n",
        "    return {\n",
        "        \"input\": x['question'],\n",
        "        \"output\": chatbot_application(x['question']),\n",
        "        \"context\": menu_items,\n",
        "        \"reference\": x['response']\n",
        "    }\n"
      ],
      "metadata": {
        "id": "jd75tlHbf2wg"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#dataset = client.get_or_create_dataset(name=\"foodchatbot_eval\")"
      ],
      "metadata": {
        "id": "5V7_qLTaf4EI"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Define the metrics\n",
        "metrics = [IsJson()]\n",
        "\n",
        "# experiment_name\n",
        "experiment_name = MODEL + \"_\" + dataset.name + \"_\" + datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")\n",
        "\n",
        "# run evaluation\n",
        "evaluation = evaluate(\n",
        "    experiment_name=experiment_name,\n",
        "    dataset=dataset,\n",
        "    task=evaluation_task,\n",
        "    scoring_metrics=metrics,\n",
        "    experiment_config={\n",
        "        \"model\": MODEL\n",
        "    }\n",
        ")"
      ],
      "metadata": {
        "id": "Iro4ybLof6Q2"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "L3osp2A-f2ti"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "Aw0A5CIGf2sZ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "u-W0bkn4f2qL"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "DSuvdj2vf2ny"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "BkvQd_QKf2lD"
      },
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "comet-eval",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.15"
    },
    "colab": {
      "provenance": [],
      "include_colab_link": true
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/caleb-kaiser/2cbcce465ffa5e856af07151c202a51c/3-0-evaluation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"<img src=\"https://raw.githubusercontent.com/comet-ml/opik/main/apps/opik-documentation/documentation/static/img/opik-logo.svg\" width=\"250\"/>"
	],
	"metadata": {
	"id": "tO9p7St93Pa0"
	}
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "vqfKhJcs92nt"
	},
	"source": [
	"# Evaluation with Opik\n",
	"\n",
	"In this exercise, you'll implement a basic evaluation pipeline with Opik. You can use OpenAI or open source models via LiteLLM"
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"# Imports & Configuration"
	],
	"metadata": {
	"id": "UhlBep2DdIKx"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"! pip install opik openai comet_ml litellm --quiet"
	],
	"metadata": {
	"id": "mDiX7cUYdLiQ"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "5xcX30we92nx"
	},
	"outputs": [],
	"source": [
	"import opik\n",
	"from opik import Opik, track\n",
	"from opik.evaluation import evaluate\n",
	"from opik.evaluation.metrics import (IsJson)\n",
	"from opik.integrations.openai import track_openai\n",
	"import openai\n",
	"import os\n",
	"from datetime import datetime\n",
	"from getpass import getpass\n",
	"import litellm\n",
	"\n",
	"# Define project name to enable tracing\n",
	"os.environ[\"OPIK_PROJECT_NAME\"] = \"food_chatbot_eval\""
	]
	},
	{
	"cell_type": "code",
	"source": [
	"# Opik configuration\n",
	"if \"OPIK_API_KEY\" not in os.environ:\n",
	" os.environ[\"OPIK_API_KEY\"] = getpass(\"Enter your Opik API key: \")\n",
	"\n",
	"opik.configure()"
	],
	"metadata": {
	"id": "NcJ25mYXdb58"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"# OpenAI configuration (ignore if you're using LiteLLM)\n",
	"if \"OPENAI_API_KEY\" not in os.environ:\n",
	" os.environ[\"OPENAI_API_KEY\"] = getpass(\"Enter your OpenAI API key: \")"
	],
	"metadata": {
	"id": "b7nLcmo70kgD"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"MODEL = \"gpt-4o-mini\""
	],
	"metadata": {
	"id": "Qrp4OjVKdTsI"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"client = opik.Opik()"
	],
	"metadata": {
	"id": "IT1T_ilrel9J"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"# Dataset"
	],
	"metadata": {
	"id": "aaUhMjZhlknD"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"# Create or get the dataset\n",
	"dataset = client.get_or_create_dataset(name=\"foodchatbot_eval\")"
	],
	"metadata": {
	"id": "NjD0upvAlmRN"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"## Optional: Download Dataset From Comet"
	],
	"metadata": {
	"id": "V0ymGQlYmzdT"
	}
	},
	{
	"cell_type": "markdown",
	"source": [
	"If you have not previously created the `foodchatbot_eval` dataset in your Opik workspace, run the following code to download the dataset as a Comet Artifact and populate your Opik dataset.\n",
	"\n",
	"If you have already created the `foodchatbot_eval` dataset, you can skip to the next section"
	],
	"metadata": {
	"id": "FskOHALFmdTA"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"import comet_ml"
	],
	"metadata": {
	"id": "0D9BaSz6lmOu"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"experiment = comet_ml.start(project_name=\"foodchatbot_eval\")\n",
	"\n",
	"logged_artifact = experiment.get_artifact(artifact_name=\"foodchatbot_eval\",\n",
	" workspace=\"examples\")\n",
	"local_artifact = logged_artifact.download(\"./\")\n",
	"experiment.end()"
	],
	"metadata": {
	"id": "UMzbSBCPlmIR"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"import csv\n",
	"import json\n",
	"# Read the CSV file and insert items into the dataset\n",
	"with open('./foodchatbot_clean_eval_dataset.csv', newline='') as csvfile:\n",
	" reader = csv.reader(csvfile)\n",
	" for row in reader:\n",
	" index, question, response = row\n",
	" item = {\n",
	" \"index\": index,\n",
	" \"question\": question,\n",
	" \"response\": response\n",
	" }\n",
	"\n",
	" dataset.insert([item])"
	],
	"metadata": {
	"id": "s8IpjR9Bm920"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"# Templates & Prompts"
	],
	"metadata": {
	"id": "o87OB7Eqe24p"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"# menu items\n",
	"menu_items = \"\"\"\n",
	"Menu: Kids Menu\n",
	"Food Item: Mini Cheeseburger\n",
	"Price: $6.99\n",
	"Vegan: N\n",
	"Popularity: 4/5\n",
	"Included: Mini beef patty, cheese, lettuce, tomato, and fries.\n",
	"\n",
	"Menu: Appetizers\n",
	"Food Item: Loaded Potato Skins\n",
	"Price: $8.99\n",
	"Vegan: N\n",
	"Popularity: 3/5\n",
	"Included: Crispy potato skins filled with cheese, bacon bits, and served with sour cream.\n",
	"\n",
	"Menu: Appetizers\n",
	"Food Item: Bruschetta\n",
	"Price: $7.99\n",
	"Vegan: Y\n",
	"Popularity: 4/5\n",
	"Included: Toasted baguette slices topped with fresh tomatoes, basil, garlic, and balsamic glaze.\n",
	"\n",
	"Menu: Main Menu\n",
	"Food Item: Grilled Chicken Caesar Salad\n",
	"Price: $12.99\n",
	"Vegan: N\n",
	"Popularity: 4/5\n",
	"Included: Grilled chicken breast, romaine lettuce, Parmesan cheese, croutons, and Caesar dressing.\n",
	"\n",
	"Menu: Main Menu\n",
	"Food Item: Classic Cheese Pizza\n",
	"Price: $10.99\n",
	"Vegan: N\n",
	"Popularity: 5/5\n",
	"Included: Thin-crust pizza topped with tomato sauce, mozzarella cheese, and fresh basil.\n",
	"\n",
	"Menu: Main Menu\n",
	"Food Item: Spaghetti Bolognese\n",
	"Price: $14.99\n",
	"Vegan: N\n",
	"Popularity: 4/5\n",
	"Included: Pasta tossed in a savory meat sauce made with ground beef, tomatoes, onions, and herbs.\n",
	"\n",
	"Menu: Vegan Options\n",
	"Food Item: Veggie Wrap\n",
	"Price: $9.99\n",
	"Vegan: Y\n",
	"Popularity: 3/5\n",
	"Included: Grilled vegetables, hummus, mixed greens, and a wrap served with a side of sweet potato fries.\n",
	"\n",
	"Menu: Vegan Options\n",
	"Food Item: Vegan Beyond Burger\n",
	"Price: $11.99\n",
	"Vegan: Y\n",
	"Popularity: 4/5\n",
	"Included: Plant-based patty, vegan cheese, lettuce, tomato, onion, and a choice of regular or sweet potato fries.\n",
	"\n",
	"Menu: Desserts\n",
	"Food Item: Chocolate Lava Cake\n",
	"Price: $6.99\n",
	"Vegan: N\n",
	"Popularity: 5/5\n",
	"Included: Warm chocolate cake with a gooey molten center, served with vanilla ice cream.\n",
	"\n",
	"Menu: Desserts\n",
	"Food Item: Fresh Berry Parfait\n",
	"Price: $5.99\n",
	"Vegan: Y\n",
	"Popularity: 4/5\n",
	"Included: Layers of mixed berries, granola, and vegan coconut yogurt.\n",
	"\"\"\"\n"
	],
	"metadata": {
	"id": "UddfOiIFe4ja"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"prompt_template = \"\"\"Answer a question about the following menu:\n",
	"\n",
	"# MENU\n",
	"{menu}\n",
	"\n",
	"# QUESTION\n",
	"{question}\n",
	"\"\"\""
	],
	"metadata": {
	"id": "TP3uUuY9e5jW"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [],
	"metadata": {
	"id": "j0viO7Gqe5hJ"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [],
	"metadata": {
	"id": "qFrieyPRe5e-"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [],
	"metadata": {
	"id": "MWL8el1ve5c1"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [],
	"metadata": {
	"id": "xWyIaDwIe5Z4"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "G3-pMk6p92nz"
	},
	"source": [
	"# LLM Application\n"
	]
	},
	{
	"cell_type": "code",
	"source": [
	"# Simple little client class for using different LLM APIs (OpenAI or LiteLLM)\n",
	"class LLMClient:\n",
	" def __init__(self, client_type: str =\"openai\", model: str =\"gpt-4\"):\n",
	" self.client_type = client_type\n",
	" self.model = model\n",
	"\n",
	" if self.client_type == \"openai\":\n",
	" self.client = track_openai(openai.OpenAI())\n",
	"\n",
	" else:\n",
	" self.client = None\n",
	"\n",
	" # LiteLLM query function\n",
	" def _get_litellm_response(self, query: str, system: str = \"You are a helpful assistant.\"):\n",
	" messages = [\n",
	" {\"role\": \"system\", \"content\": system },\n",
	" { \"role\": \"user\", \"content\": query }\n",
	" ]\n",
	"\n",
	" response = litellm.completion(\n",
	" model=self.model,\n",
	" messages=messages\n",
	" )\n",
	"\n",
	" return response.choices[0].message.content\n",
	"\n",
	" # OpenAI query function - use **kwargs to pass arguments like temperature\n",
	" def _get_openai_response(self, query: str, system: str = \"You are a helpful assistant.\", **kwargs):\n",
	" messages = [\n",
	" {\"role\": \"system\", \"content\": system },\n",
	" { \"role\": \"user\", \"content\": query }\n",
	" ]\n",
	"\n",
	" response = self.client.chat.completions.create(\n",
	" model=self.model,\n",
	" messages=messages,\n",
	" **kwargs\n",
	" )\n",
	"\n",
	" return response.choices[0].message.content\n",
	"\n",
	"\n",
	" def query(self, query: str, system: str = \"You are a helpful assistant.\", **kwargs):\n",
	" if self.client_type == 'openai':\n",
	" return self._get_openai_response(query, system, **kwargs)\n",
	"\n",
	" else:\n",
	" return self._get_litellm_response(query, system)\n",
	"\n",
	"\n",
	"\n"
	],
	"metadata": {
	"id": "3rhh1oX6fUTz"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"llm_client = LLMClient(model=MODEL)"
	],
	"metadata": {
	"id": "w1e6ceRpfZiJ"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "m6s3Rk9u92n0"
	},
	"outputs": [],
	"source": [
	"@track\n",
	"def chatbot_application(input: str) -> str:\n",
	" response = llm_client.query(prompt_template.format(menu=menu_items, question=input))\n",
	" return response\n",
	"\n"
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"# Evaluation"
	],
	"metadata": {
	"id": "hkGXuEY33Dc7"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"# Define the evaluation task\n",
	"def evaluation_task(x):\n",
	" return {\n",
	" \"input\": x['question'],\n",
	" \"output\": chatbot_application(x['question']),\n",
	" \"context\": menu_items,\n",
	" \"reference\": x['response']\n",
	" }\n"
	],
	"metadata": {
	"id": "jd75tlHbf2wg"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"#dataset = client.get_or_create_dataset(name=\"foodchatbot_eval\")"
	],
	"metadata": {
	"id": "5V7_qLTaf4EI"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"# Define the metrics\n",
	"metrics = [IsJson()]\n",
	"\n",
	"# experiment_name\n",
	"experiment_name = MODEL + \"_\" + dataset.name + \"_\" + datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")\n",
	"\n",
	"# run evaluation\n",
	"evaluation = evaluate(\n",
	" experiment_name=experiment_name,\n",
	" dataset=dataset,\n",
	" task=evaluation_task,\n",
	" scoring_metrics=metrics,\n",
	" experiment_config={\n",
	" \"model\": MODEL\n",
	" }\n",
	")"
	],
	"metadata": {
	"id": "Iro4ybLof6Q2"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [],
	"metadata": {
	"id": "L3osp2A-f2ti"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [],
	"metadata": {
	"id": "Aw0A5CIGf2sZ"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [],
	"metadata": {
	"id": "u-W0bkn4f2qL"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [],
	"metadata": {
	"id": "DSuvdj2vf2ny"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [],
	"metadata": {
	"id": "BkvQd_QKf2lD"
	},
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "comet-eval",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.15"
	},
	"colab": {
	"provenance": [],
	"include_colab_link": true
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}
No results found