Created
December 12, 2024 05:12
-
-
Save nesheep5/d5c89bc26b15d30a379715e47e3ac2dd to your computer and use it in GitHub Desktop.
rag_sample.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "toc_visible": true, | |
| "authorship_tag": "ABX9TyP1Eii7PSvsENV0s1p8mMwL", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/nesheep5/d5c89bc26b15d30a379715e47e3ac2dd/rag_sample.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "# 社内ドキュメントを検索するRAGのサンプルコード\n", | |
| "\n", | |
| "参考記事 : https://zenn.dev/umi_mori/books/llm-rag-langchain-python/viewer/rag-flow\n", | |
| "\n" | |
| ], | |
| "metadata": { | |
| "id": "z-sU2pTefNeI" | |
| } | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "準備 : 社内ドキュメント(就業規則ファイル)の作成" | |
| ], | |
| "metadata": { | |
| "id": "WzG6Igy11dPx" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# ファイル内容を定義\n", | |
| "content = \"\"\"# 就業規則\n", | |
| "・この就業規則は、株式会社Galirage(以下、「当社」という)の全従業員に適用され、従業員が遵守すべき基本的なルールと指針を定めるものです。\n", | |
| "・基本勤務時間は、午前9時から午後3時までとします。休憩時間は12時から1時までの1時間です。\n", | |
| "・時間外労働については、事前に上司の承認を必要とします。\n", | |
| "・定休日は土曜日、日曜日、および国が定める祝日です。\n", | |
| "・年次有給休暇は、入社半年後に10日間付与されます。以後、勤続年数に応じて加算されます。\n", | |
| "\"\"\"\n", | |
| "\n", | |
| "# ファイルを作成work_rulesして内容を書き込む\n", | |
| "file_path = \"work_rules.md\"\n", | |
| "with open(file_path, \"w\", encoding=\"utf-8\") as file:\n", | |
| " file.write(content)\n", | |
| "\n", | |
| "# ファイルの確認\n", | |
| "print(f\"{file_path} ファイルが作成されました。\")" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "F2Coq8QH1XaR", | |
| "outputId": "911902ee-7dd0-455b-bf77-6cbc73687581" | |
| }, | |
| "execution_count": 1, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "work_rules.md ファイルが作成されました。\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "必要なライブラリをインストール" | |
| ], | |
| "metadata": { | |
| "id": "pmZh3VCifH7M" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "!pip install langchain\n", | |
| "!pip install langchain-core\n", | |
| "!pip install langchain-community\n", | |
| "!pip install langchain-openai\n", | |
| "!pip install docarray\n", | |
| "\n", | |
| "# for bugfix https://community.openai.com/t/error-with-openai-1-56-0-client-init-got-an-unexpected-keyword-argument-proxies/1040332/23\n", | |
| "!pip install openai==1.55.3 httpx==0.27.2 typing-extensions>=4.11.0,<5.0.0 --force-reinstall --quiet" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "collapsed": true, | |
| "id": "1jc0g6sm2upE", | |
| "outputId": "4dfa5e83-d6cf-4209-a875-30cdf72f72dd" | |
| }, | |
| "execution_count": 2, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Requirement already satisfied: langchain in /usr/local/lib/python3.10/dist-packages (0.3.9)\n", | |
| "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.2)\n", | |
| "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.36)\n", | |
| "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.11.9)\n", | |
| "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)\n", | |
| "Requirement already satisfied: langchain-core<0.4.0,>=0.3.21 in /usr/local/lib/python3.10/dist-packages (from langchain) (0.3.21)\n", | |
| "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (0.3.2)\n", | |
| "Requirement already satisfied: langsmith<0.2.0,>=0.1.17 in /usr/local/lib/python3.10/dist-packages (from langchain) (0.1.147)\n", | |
| "Requirement already satisfied: numpy<2,>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.26.4)\n", | |
| "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.10.3)\n", | |
| "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.32.3)\n", | |
| "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (9.0.0)\n", | |
| "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.4)\n", | |
| "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", | |
| "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (24.2.0)\n", | |
| "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.5.0)\n", | |
| "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.1.0)\n", | |
| "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (0.2.1)\n", | |
| "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.18.3)\n", | |
| "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.21->langchain) (1.33)\n", | |
| "Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.21->langchain) (24.2)\n", | |
| "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.21->langchain) (4.12.2)\n", | |
| "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.17->langchain) (0.28.0)\n", | |
| "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.17->langchain) (3.10.12)\n", | |
| "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.17->langchain) (1.0.0)\n", | |
| "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n", | |
| "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.27.1)\n", | |
| "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.4.0)\n", | |
| "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.10)\n", | |
| "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2.2.3)\n", | |
| "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2024.8.30)\n", | |
| "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.1.1)\n", | |
| "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (3.7.1)\n", | |
| "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (1.0.7)\n", | |
| "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (0.14.0)\n", | |
| "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.21->langchain) (3.0.0)\n", | |
| "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (1.3.1)\n", | |
| "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (1.2.2)\n", | |
| "Requirement already satisfied: langchain-core in /usr/local/lib/python3.10/dist-packages (0.3.21)\n", | |
| "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain-core) (6.0.2)\n", | |
| "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core) (1.33)\n", | |
| "Requirement already satisfied: langsmith<0.2.0,>=0.1.125 in /usr/local/lib/python3.10/dist-packages (from langchain-core) (0.1.147)\n", | |
| "Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core) (24.2)\n", | |
| "Requirement already satisfied: pydantic<3.0.0,>=2.5.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core) (2.10.3)\n", | |
| "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-core) (9.0.0)\n", | |
| "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.10/dist-packages (from langchain-core) (4.12.2)\n", | |
| "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core) (3.0.0)\n", | |
| "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.125->langchain-core) (0.28.0)\n", | |
| "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.125->langchain-core) (3.10.12)\n", | |
| "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.125->langchain-core) (2.32.3)\n", | |
| "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.125->langchain-core) (1.0.0)\n", | |
| "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.5.2->langchain-core) (0.7.0)\n", | |
| "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.5.2->langchain-core) (2.27.1)\n", | |
| "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (3.7.1)\n", | |
| "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (2024.8.30)\n", | |
| "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (1.0.7)\n", | |
| "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (3.10)\n", | |
| "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (0.14.0)\n", | |
| "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.125->langchain-core) (3.4.0)\n", | |
| "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.125->langchain-core) (2.2.3)\n", | |
| "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (1.3.1)\n", | |
| "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (1.2.2)\n", | |
| "Collecting langchain-community\n", | |
| " Downloading langchain_community-0.3.11-py3-none-any.whl.metadata (2.9 kB)\n", | |
| "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (6.0.2)\n", | |
| "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (2.0.36)\n", | |
| "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (3.11.9)\n", | |
| "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)\n", | |
| " Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\n", | |
| "Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)\n", | |
| " Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)\n", | |
| "Collecting langchain<0.4.0,>=0.3.11 (from langchain-community)\n", | |
| " Downloading langchain-0.3.11-py3-none-any.whl.metadata (7.1 kB)\n", | |
| "Collecting langchain-core<0.4.0,>=0.3.24 (from langchain-community)\n", | |
| " Downloading langchain_core-0.3.24-py3-none-any.whl.metadata (6.3 kB)\n", | |
| "Requirement already satisfied: langsmith<0.3,>=0.1.125 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (0.1.147)\n", | |
| "Requirement already satisfied: numpy<2,>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (1.26.4)\n", | |
| "Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)\n", | |
| " Downloading pydantic_settings-2.6.1-py3-none-any.whl.metadata (3.5 kB)\n", | |
| "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (2.32.3)\n", | |
| "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (9.0.0)\n", | |
| "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (2.4.4)\n", | |
| "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.3.1)\n", | |
| "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (4.0.3)\n", | |
| "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (24.2.0)\n", | |
| "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.5.0)\n", | |
| "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (6.1.0)\n", | |
| "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (0.2.1)\n", | |
| "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.18.3)\n", | |
| "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)\n", | |
| " Downloading marshmallow-3.23.1-py3-none-any.whl.metadata (7.5 kB)\n", | |
| "Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)\n", | |
| " Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n", | |
| "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from langchain<0.4.0,>=0.3.11->langchain-community) (0.3.2)\n", | |
| "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.10/dist-packages (from langchain<0.4.0,>=0.3.11->langchain-community) (2.10.3)\n", | |
| "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.24->langchain-community) (1.33)\n", | |
| "Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.24->langchain-community) (24.2)\n", | |
| "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.24->langchain-community) (4.12.2)\n", | |
| "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.3,>=0.1.125->langchain-community) (0.28.0)\n", | |
| "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.3,>=0.1.125->langchain-community) (3.10.12)\n", | |
| "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.3,>=0.1.125->langchain-community) (1.0.0)\n", | |
| "Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)\n", | |
| " Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n", | |
| "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (3.4.0)\n", | |
| "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (3.10)\n", | |
| "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (2.2.3)\n", | |
| "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (2024.8.30)\n", | |
| "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain-community) (3.1.1)\n", | |
| "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->langsmith<0.3,>=0.1.125->langchain-community) (3.7.1)\n", | |
| "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->langsmith<0.3,>=0.1.125->langchain-community) (1.0.7)\n", | |
| "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.3,>=0.1.125->langchain-community) (0.14.0)\n", | |
| "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.24->langchain-community) (3.0.0)\n", | |
| "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain<0.4.0,>=0.3.11->langchain-community) (0.7.0)\n", | |
| "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain<0.4.0,>=0.3.11->langchain-community) (2.27.1)\n", | |
| "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)\n", | |
| " Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)\n", | |
| "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.3,>=0.1.125->langchain-community) (1.3.1)\n", | |
| "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.3,>=0.1.125->langchain-community) (1.2.2)\n", | |
| "Downloading langchain_community-0.3.11-py3-none-any.whl (2.5 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.5/2.5 MB\u001b[0m \u001b[31m28.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n", | |
| "Downloading httpx_sse-0.4.0-py3-none-any.whl (7.8 kB)\n", | |
| "Downloading langchain-0.3.11-py3-none-any.whl (1.0 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m40.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading langchain_core-0.3.24-py3-none-any.whl (410 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading pydantic_settings-2.6.1-py3-none-any.whl (28 kB)\n", | |
| "Downloading marshmallow-3.23.1-py3-none-any.whl (49 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.5/49.5 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", | |
| "Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", | |
| "Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", | |
| "Installing collected packages: python-dotenv, mypy-extensions, marshmallow, httpx-sse, typing-inspect, pydantic-settings, dataclasses-json, langchain-core, langchain, langchain-community\n", | |
| " Attempting uninstall: langchain-core\n", | |
| " Found existing installation: langchain-core 0.3.21\n", | |
| " Uninstalling langchain-core-0.3.21:\n", | |
| " Successfully uninstalled langchain-core-0.3.21\n", | |
| " Attempting uninstall: langchain\n", | |
| " Found existing installation: langchain 0.3.9\n", | |
| " Uninstalling langchain-0.3.9:\n", | |
| " Successfully uninstalled langchain-0.3.9\n", | |
| "Successfully installed dataclasses-json-0.6.7 httpx-sse-0.4.0 langchain-0.3.11 langchain-community-0.3.11 langchain-core-0.3.24 marshmallow-3.23.1 mypy-extensions-1.0.0 pydantic-settings-2.6.1 python-dotenv-1.0.1 typing-inspect-0.9.0\n", | |
| "Collecting langchain-openai\n", | |
| " Downloading langchain_openai-0.2.12-py3-none-any.whl.metadata (2.7 kB)\n", | |
| "Requirement already satisfied: langchain-core<0.4.0,>=0.3.21 in /usr/local/lib/python3.10/dist-packages (from langchain-openai) (0.3.24)\n", | |
| "Collecting openai<2.0.0,>=1.55.3 (from langchain-openai)\n", | |
| " Downloading openai-1.57.2-py3-none-any.whl.metadata (24 kB)\n", | |
| "Collecting tiktoken<1,>=0.7 (from langchain-openai)\n", | |
| " Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", | |
| "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.21->langchain-openai) (6.0.2)\n", | |
| "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.21->langchain-openai) (1.33)\n", | |
| "Requirement already satisfied: langsmith<0.3,>=0.1.125 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.21->langchain-openai) (0.1.147)\n", | |
| "Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.21->langchain-openai) (24.2)\n", | |
| "Requirement already satisfied: pydantic<3.0.0,>=2.5.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.21->langchain-openai) (2.10.3)\n", | |
| "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.21->langchain-openai) (9.0.0)\n", | |
| "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.4.0,>=0.3.21->langchain-openai) (4.12.2)\n", | |
| "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.55.3->langchain-openai) (3.7.1)\n", | |
| "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.55.3->langchain-openai) (1.9.0)\n", | |
| "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.55.3->langchain-openai) (0.28.0)\n", | |
| "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.55.3->langchain-openai) (0.8.0)\n", | |
| "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.55.3->langchain-openai) (1.3.1)\n", | |
| "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.55.3->langchain-openai) (4.66.6)\n", | |
| "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<1,>=0.7->langchain-openai) (2024.9.11)\n", | |
| "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.10/dist-packages (from tiktoken<1,>=0.7->langchain-openai) (2.32.3)\n", | |
| "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.55.3->langchain-openai) (3.10)\n", | |
| "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.55.3->langchain-openai) (1.2.2)\n", | |
| "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.55.3->langchain-openai) (2024.8.30)\n", | |
| "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.55.3->langchain-openai) (1.0.7)\n", | |
| "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.55.3->langchain-openai) (0.14.0)\n", | |
| "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.21->langchain-openai) (3.0.0)\n", | |
| "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.3,>=0.1.125->langchain-core<0.4.0,>=0.3.21->langchain-openai) (3.10.12)\n", | |
| "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.3,>=0.1.125->langchain-core<0.4.0,>=0.3.21->langchain-openai) (1.0.0)\n", | |
| "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4.0,>=0.3.21->langchain-openai) (0.7.0)\n", | |
| "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4.0,>=0.3.21->langchain-openai) (2.27.1)\n", | |
| "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken<1,>=0.7->langchain-openai) (3.4.0)\n", | |
| "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken<1,>=0.7->langchain-openai) (2.2.3)\n", | |
| "Downloading langchain_openai-0.2.12-py3-none-any.whl (50 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.7/50.7 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading openai-1.57.2-py3-none-any.whl (389 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m389.9/389.9 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m31.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hInstalling collected packages: tiktoken, openai, langchain-openai\n", | |
| " Attempting uninstall: openai\n", | |
| " Found existing installation: openai 1.54.5\n", | |
| " Uninstalling openai-1.54.5:\n", | |
| " Successfully uninstalled openai-1.54.5\n", | |
| "Successfully installed langchain-openai-0.2.12 openai-1.57.2 tiktoken-0.8.0\n", | |
| "Collecting docarray\n", | |
| " Downloading docarray-0.40.0-py3-none-any.whl.metadata (36 kB)\n", | |
| "Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib/python3.10/dist-packages (from docarray) (1.26.4)\n", | |
| "Requirement already satisfied: orjson>=3.8.2 in /usr/local/lib/python3.10/dist-packages (from docarray) (3.10.12)\n", | |
| "Requirement already satisfied: pydantic>=1.10.8 in /usr/local/lib/python3.10/dist-packages (from docarray) (2.10.3)\n", | |
| "Requirement already satisfied: rich>=13.1.0 in /usr/local/lib/python3.10/dist-packages (from docarray) (13.9.4)\n", | |
| "Collecting types-requests>=2.28.11.6 (from docarray)\n", | |
| " Downloading types_requests-2.32.0.20241016-py3-none-any.whl.metadata (1.9 kB)\n", | |
| "Requirement already satisfied: typing-inspect>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from docarray) (0.9.0)\n", | |
| "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10.8->docarray) (0.7.0)\n", | |
| "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10.8->docarray) (2.27.1)\n", | |
| "Requirement already satisfied: typing-extensions>=4.12.2 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10.8->docarray) (4.12.2)\n", | |
| "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=13.1.0->docarray) (3.0.0)\n", | |
| "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=13.1.0->docarray) (2.18.0)\n", | |
| "Requirement already satisfied: urllib3>=2 in /usr/local/lib/python3.10/dist-packages (from types-requests>=2.28.11.6->docarray) (2.2.3)\n", | |
| "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from typing-inspect>=0.8.0->docarray) (1.0.0)\n", | |
| "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=13.1.0->docarray) (0.1.2)\n", | |
| "Downloading docarray-0.40.0-py3-none-any.whl (270 kB)\n", | |
| "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m270.2/270.2 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
| "\u001b[?25hDownloading types_requests-2.32.0.20241016-py3-none-any.whl (15 kB)\n", | |
| "Installing collected packages: types-requests, docarray\n", | |
| "Successfully installed docarray-0.40.0 types-requests-2.32.0.20241016\n", | |
| "/bin/bash: line 1: 5.0.0: No such file or directory\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "就業規則ファイルのロード、分割" | |
| ], | |
| "metadata": { | |
| "id": "vKjTDSAmgkgf" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from google.colab import userdata\n", | |
| "from langchain_text_splitters import CharacterTextSplitter\n", | |
| "from langchain_community.document_loaders import TextLoader\n", | |
| "\n", | |
| "\n", | |
| "loader = TextLoader(\"./work_rules.md\", encoding='utf8')\n", | |
| "documents = loader.load()\n", | |
| "\n", | |
| "text_splitter = CharacterTextSplitter(separator=\"\\n\", chunk_size=10, chunk_overlap=0)\n", | |
| "docs = text_splitter.split_documents(documents)\n", | |
| "\n", | |
| "print(len(docs))\n", | |
| "for doc in docs:\n", | |
| " print(doc.page_content)\n", | |
| "\n" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "collapsed": true, | |
| "id": "zptSotUK1owt", | |
| "outputId": "86f1260d-2324-47b3-8aa8-ad052e5a24b5" | |
| }, | |
| "execution_count": 3, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stderr", | |
| "text": [ | |
| "WARNING:langchain_text_splitters.base:Created a chunk of size 72, which is longer than the specified 10\n", | |
| "WARNING:langchain_text_splitters.base:Created a chunk of size 47, which is longer than the specified 10\n", | |
| "WARNING:langchain_text_splitters.base:Created a chunk of size 28, which is longer than the specified 10\n", | |
| "WARNING:langchain_text_splitters.base:Created a chunk of size 26, which is longer than the specified 10\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "6\n", | |
| "# 就業規則\n", | |
| "・この就業規則は、株式会社Galirage(以下、「当社」という)の全従業員に適用され、従業員が遵守すべき基本的なルールと指針を定めるものです。\n", | |
| "・基本勤務時間は、午前9時から午後3時までとします。休憩時間は12時から1時までの1時間です。\n", | |
| "・時間外労働については、事前に上司の承認を必要とします。\n", | |
| "・定休日は土曜日、日曜日、および国が定める祝日です。\n", | |
| "・年次有給休暇は、入社半年後に10日間付与されます。以後、勤続年数に応じて加算されます。\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "ドキュメントの埋め込み(Embedding)" | |
| ], | |
| "metadata": { | |
| "id": "tT1k58irg3b5" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import os\n", | |
| "\n", | |
| "from langchain_openai import OpenAIEmbeddings\n", | |
| "from langchain_community.vectorstores import DocArrayInMemorySearch\n", | |
| "\n", | |
| "os.environ[\"OPENAI_API_KEY\"] = userdata.get('OPENAI_API_KEY')\n", | |
| "\n", | |
| "embeddings = OpenAIEmbeddings()\n", | |
| "db = DocArrayInMemorySearch.from_documents(docs, embeddings)\n", | |
| "retriever = db.as_retriever()" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "collapsed": true, | |
| "id": "a4w4GBV_4_Tj", | |
| "outputId": "3e5e90bb-6460-4d4b-9fc1-e64b598af503" | |
| }, | |
| "execution_count": 4, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stderr", | |
| "text": [ | |
| "/usr/local/lib/python3.10/dist-packages/pydantic/_migration.py:283: UserWarning: `pydantic.error_wrappers:ValidationError` has been moved to `pydantic:ValidationError`.\n", | |
| " warnings.warn(f'`{import_path}` has been moved to `{new_location}`.')\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import os\n", | |
| "\n", | |
| "from langchain_core.prompts import ChatPromptTemplate\n", | |
| "from langchain_openai import ChatOpenAI\n", | |
| "from langchain_core.output_parsers import StrOutputParser\n", | |
| "from langchain_core.runnables import RunnableParallel, RunnablePassthrough\n", | |
| "\n", | |
| "# import langchain\n", | |
| "# langchain.debug = True\n", | |
| "\n", | |
| "template = \"\"\"\n", | |
| "# ゴール\n", | |
| "私は、就業規則の文章と質問を提供します。\n", | |
| "あなたは、就業規則に基づいて、質問に対する回答を生成してください。\n", | |
| "\n", | |
| "# 質問\n", | |
| "{question}\n", | |
| "\n", | |
| "# 就業規則\n", | |
| "{context}\n", | |
| "\"\"\"\n", | |
| "\n", | |
| "prompt = ChatPromptTemplate.from_template(template)\n", | |
| "llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.9)\n", | |
| "output_parser = StrOutputParser()\n", | |
| "setup_and_retrieval = RunnableParallel(\n", | |
| " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", | |
| ")\n", | |
| "\n", | |
| "chain = setup_and_retrieval | prompt | llm | output_parser\n", | |
| "\n", | |
| "res_1 = chain.invoke(\"基本勤務時間は、何時から、何時ですか? 就業規則より該当箇所の引用も合わせて表示してください\")\n", | |
| "print(res_1)" | |
| ], | |
| "metadata": { | |
| "id": "T75nL7JlK00H", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "outputId": "30c6d82c-b5ab-4695-fe63-6784e6706ac3" | |
| }, | |
| "execution_count": 5, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "基本勤務時間は、午前9時から午後3時までとされています。休憩時間は12時から1時までの1時間です。\n", | |
| "\n", | |
| "引用:「・基本勤務時間は、午前9時から午後3時までとします。休憩時間は12時から1時までの1時間です。」\n" | |
| ] | |
| } | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment