Skip to content

Instantly share code, notes, and snippets.

@m77so
Created May 25, 2025 06:12
Show Gist options
  • Select an option

  • Save m77so/cb30aa283cde4bb93d536796eb9e9a12 to your computer and use it in GitHub Desktop.

Select an option

Save m77so/cb30aa283cde4bb93d536796eb9e9a12 to your computer and use it in GitHub Desktop.

右上のアイコン->設定->データコントロール->エクスポートする

conversations.jsonを保存する

↑のスクリプトを実行する

#!/usr/bin/env python
"""
conversations.json → 日次 API コスト試算
* モデル別 token
* Web Search クエリをカウントして課金
"""
import json, sys, pathlib, re, collections, zoneinfo
from datetime import datetime
import tiktoken, pandas as pd
# ── 単価 ───────────────────────────────────────────────────────────
PRICE = {
"gpt-4o": dict(inp=5.00, out=20.00),
"gpt-4o-mini": dict(inp=0.60, out=2.40),
"o3": dict(inp=10.00, out=40.00),
"o4-mini": dict(inp=1.10, out=4.40),
}
SEARCH_RATE = {
"gpt-4o": 0.030, # 1 query ≈ $0.03
"o3": 0.030,
"gpt-4o-mini": 0.0275,
"o4-mini": 0.0275,
}
# ──────────────────────────────────────────────────────────────────
JST = zoneinfo.ZoneInfo("Asia/Tokyo")
enc = tiktoken.get_encoding("cl100k_base")
def daykey(ts): # epoch → "YYYY-MM-DD"
return datetime.fromtimestamp(ts, JST).strftime("%Y-%m-%d")
def get_model_slug(msg):
md = (msg or {}).get("metadata", {})
return md.get("model_slug") or md.get("default_model_slug")
# --- 検索クエリ検出 ------------------------------------------------
def count_search_queries(raw: str) -> int:
if '"search_query"' not in raw:
return 0
return raw.count('"q"') # "q": の個数を単純に数える
def find_parent_model(node_id, mapping):
cur = node_id
while cur:
par = mapping.get(cur, {}).get("parent")
if not par:
break
pmsg = mapping.get(par, {}).get("message") or {}
if pmsg.get("author", {}).get("role") == "assistant":
slug = get_model_slug(pmsg)
if slug:
return slug
cur = par
return "gpt-4o"
# ------------------------------------------------------------------
data = json.loads(pathlib.Path("conversations.json").read_text())
tok_tot = collections.defaultdict(lambda: collections.defaultdict(lambda: [0, 0]))
web_cost = collections.defaultdict(float) # day → $
for conv in data:
mapping = conv.get("mapping", {})
for nid, node in mapping.items():
msg = node.get("message") or {}
role = msg.get("author", {}).get("role")
if role not in ("user", "assistant"):
continue
ts = msg.get("create_time") or conv.get("create_time")
if ts is None:
continue
dkey = daykey(ts)
# 本文テキスト
cont = msg.get("content") or {}
raw = "\n".join(cont.get("parts", [])) if cont.get("content_type") == "text" else cont.get("text", "")
# token
tokens = len(enc.encode(raw))
# モデル
slug = get_model_slug(msg)
if role == "user" and not slug:
slug = find_parent_model(nid, mapping)
slug = slug if slug in PRICE else "gpt-4o"
io = 0 if role == "user" else 1
tok_tot[dkey][slug][io] += tokens
# assistant メッセージに search_query があれば課金
if role == "assistant":
q = count_search_queries(raw)
if q:
web_cost[dkey] += q * SEARCH_RATE.get(slug, 0.03)
# DataFrame 化
rows = []
for d, models in tok_tot.items():
for slug, (in_tok, out_tok) in models.items():
p = PRICE[slug]
cost = in_tok/1e6*p["inp"] + out_tok/1e6*p["out"]
rows.append(dict(date=d, model=slug,
input_tok=in_tok, output_tok=out_tok, cost=cost))
for d, c in web_cost.items():
rows.append(dict(date=d, model="web_search",
input_tok=0, output_tok=0, cost=c))
df = pd.DataFrame(rows).sort_values(["date", "model"])
# 日合計
day_sum = df.groupby("date")["cost"].sum().reset_index()
print("\n### 日次コスト (USD)")
print(day_sum.to_string(index=False, formatters={"cost": "${:,.4f}".format}))
print("\n### 詳細(モデル別)")
print(df.to_string(index=False, formatters={
"input_tok": "{:,}".format,
"output_tok": "{:,}".format,
"cost": "${:,.4f}".format}))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment