Created
December 4, 2025 22:19
-
-
Save kuc-arc-f/bc75332da0058716e2c5826b9456d8bf to your computer and use it in GitHub Desktop.
ADK python , RAG Data add
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import glob | |
| import os | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| import ollama | |
| import psycopg2 | |
| from pgvector.psycopg2 import register_vector | |
| # PostgreSQL 接続 | |
| conn = psycopg2.connect( | |
| dbname="mydb", | |
| user="root", | |
| password="admin", | |
| host="localhost", | |
| port=5432 | |
| ) | |
| folder_path = "./data" | |
| # pgvector を psycopg2 に登録 | |
| register_vector(conn) | |
| data = [] | |
| # スプリッターの初期化 | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, # 1チャンクあたりの文字数(目安) | |
| chunk_overlap=20, # 前後のチャンクと重複させる文字数(文脈維持のため) | |
| length_function=len, # 長さを測る関数(通常はlen) | |
| separators=["\n\n", "\n", " ", ""] # 分割する優先順位 | |
| # is_separator_regex=False, | |
| ) | |
| contData = [] | |
| for file_path in glob.glob(os.path.join(folder_path, "*.txt")): | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| targetContent = f.read() | |
| # 分割実行 | |
| chunks = text_splitter.create_documents([targetContent]) | |
| for i, chunk in enumerate(chunks): | |
| print(f"Chunk {i}: {chunk.page_content}") | |
| data.append({ | |
| "filename": os.path.basename(file_path), | |
| "content": chunk.page_content, | |
| }) | |
| contData.append(chunk.page_content) | |
| embedding = ollama.embeddings( | |
| model="qwen3-embedding:0.6b", | |
| prompt= chunk.page_content | |
| ) | |
| vec = embedding["embedding"] | |
| print(len(vec)) | |
| print(vec[:5]) | |
| # SQL へ挿入 | |
| with conn.cursor() as cur: | |
| cur.execute( | |
| "INSERT INTO documents (content, embedding) VALUES (%s, %s)", | |
| (chunk.page_content, vec) | |
| ) | |
| conn.commit() | |
| #print(data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment