Skip to content

Instantly share code, notes, and snippets.

@chriscarrollsmith
Last active September 8, 2025 01:02
Show Gist options
  • Select an option

  • Save chriscarrollsmith/42c426f38595c78f3bd8e4bcda76acdc to your computer and use it in GitHub Desktop.

Select an option

Save chriscarrollsmith/42c426f38595c78f3bd8e4bcda76acdc to your computer and use it in GitHub Desktop.
Quick script for inspecting file_search tool results returned from OpenAI embeddings vector store
import os
import sys
from typing import Any, Dict, List
from openai import OpenAI
def ensure_api_key() -> None:
if not os.getenv("OPENAI_API_KEY"):
print("ERROR: Please set OPENAI_API_KEY in your environment.")
sys.exit(1)
def to_primitive(value: Any) -> Any:
if hasattr(value, "model_dump") and callable(getattr(value, "model_dump")):
try:
return value.model_dump(exclude_none=True)
except Exception:
pass
if isinstance(value, dict):
return {k: to_primitive(v) for k, v in value.items()}
if isinstance(value, list):
return [to_primitive(v) for v in value]
if hasattr(value, "__dict__") and not isinstance(value, (str, bytes)):
try:
return {k: to_primitive(v) for k, v in value.__dict__.items() if not k.startswith("_")}
except Exception:
pass
return value
def get_assistant_vector_store_ids(client: OpenAI, assistant_id: str) -> List[str]:
try:
assistant = client.beta.assistants.retrieve(assistant_id)
tool_resources = getattr(assistant, "tool_resources", None)
if tool_resources:
file_search = getattr(tool_resources, "file_search", None)
if file_search:
vector_store_ids = getattr(file_search, "vector_store_ids", None)
if isinstance(vector_store_ids, list):
return [str(v) for v in vector_store_ids]
except Exception:
return []
return []
def main() -> None:
ensure_api_key()
client = OpenAI()
assistant_id = os.getenv("ASSISTANT_ID")
vector_store_id = os.getenv("VECTOR_STORE_ID")
if not vector_store_id:
vs_ids = get_assistant_vector_store_ids(client, assistant_id)
if not vs_ids:
print("No vector stores found on assistant and VECTOR_STORE_ID not set.")
sys.exit(1)
vector_store_id = vs_ids[0]
query = sys.argv[1] if len(sys.argv) > 1 else "clean cooking"
print(f"Using vector_store_id: {vector_store_id}")
print(f"Query: {query}")
# Perform direct vector store search (requires SDK that supports this method)
if not hasattr(client.vector_stores, "search"):
print("This SDK version does not support client.vector_stores.search. Try upgrading openai.")
sys.exit(2)
try:
# Note: Some SDK versions use `limit` instead of `max_num_results`. Adjust if needed.
results = client.vector_stores.search(
vector_store_id=vector_store_id,
query=query,
max_num_results=5,
)
except TypeError:
# Try an alternative signature with `limit`
results = client.vector_stores.search(
vector_store_id=vector_store_id,
query=query,
limit=5,
)
# Print raw shapes
import json
prim = to_primitive(results)
print("\nRaw search response shape:")
try:
print(json.dumps(prim, indent=2))
except Exception:
print(str(prim))
# If there is a `data` field with chunk-like entries, pretty-print a compact view
data = getattr(results, "data", None) or (prim.get("data") if isinstance(prim, dict) else None)
if data and isinstance(data, list):
print("\nTop chunks (compact view):")
for idx, item in enumerate(data, start=1):
item_prim: Dict[str, Any] = to_primitive(item)
file_id = item_prim.get("file_id") or item_prim.get("fileId")
score = item_prim.get("score") or item_prim.get("similarity")
# Try to pull a text field if one exists
content = item_prim.get("content")
snippet = None
if isinstance(content, str):
snippet = content
elif isinstance(content, list) and content:
# Some SDKs put text blocks in a list
maybe_text = content[0].get("text") if isinstance(content[0], dict) else None
if isinstance(maybe_text, str):
snippet = maybe_text
elif isinstance(maybe_text, dict):
snippet = maybe_text.get("value")
if snippet:
snippet = snippet if len(snippet) <= 400 else snippet[:400] + "..."
print(f"{idx}. file_id={file_id} score={score}\n text={snippet}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment