|
#!/usr/bin/env bash |
|
# recall-search — Search Claude Code conversation history using DuckDB |
|
# |
|
# Usage: |
|
# recall-search search <keywords> Search user messages for keywords |
|
# recall-search sessions List all sessions (most recent first) |
|
# recall-search spine <session-id> Extract conversation spine (user + assistant text) |
|
# recall-search context <session-id> Compact context: first 10 + last 20 messages |
|
# recall-search topics Extract topics from all sessions |
|
# recall-search stats Project-level usage dashboard |
|
# recall-search impact [N] Most impactful sessions by engagement |
|
# |
|
# Environment: |
|
# CLAUDE_PROJECT_DIR Override auto-detected project dir (default: cwd-based) |
|
# RECALL_LIMIT Max results for search/sessions (default: 20) |
|
# |
|
# Prerequisites: |
|
# - DuckDB (brew install duckdb / apt install duckdb) |
|
# - Claude Code conversation history at ~/.claude/projects/ |
|
|
|
set -euo pipefail |
|
|
|
# Resolve project directory from cwd |
|
CWD="${CLAUDE_PROJECT_DIR:-$(pwd)}" |
|
PROJECT_SLUG=$(echo "$CWD" | sed 's|/|-|g') |
|
HISTORY_DIR="$HOME/.claude/projects/$PROJECT_SLUG" |
|
LIMIT="${RECALL_LIMIT:-20}" |
|
|
|
if [ ! -d "$HISTORY_DIR" ]; then |
|
echo "No conversation history at $HISTORY_DIR" >&2 |
|
exit 1 |
|
fi |
|
|
|
FILE_COUNT=$(ls "$HISTORY_DIR"/*.jsonl 2>/dev/null | wc -l | tr -d ' ') |
|
if [ "$FILE_COUNT" = "0" ]; then |
|
echo "No .jsonl files in $HISTORY_DIR" >&2 |
|
exit 1 |
|
fi |
|
|
|
GLOB="$HISTORY_DIR/*.jsonl" |
|
|
|
# Common DuckDB flags |
|
DUCK="duckdb -markdown" |
|
|
|
# Base CTE: parse raw JSONL lines into structured fields |
|
# Using read_csv with null delimiter treats each line as a single field |
|
BASE_CTE=" |
|
WITH raw AS ( |
|
SELECT |
|
json_extract_string(column0, '\$.type') as msg_type, |
|
json_extract_string(column0, '\$.sessionId') as session_id, |
|
json_extract_string(column0, '\$.timestamp') as ts, |
|
json_extract_string(column0, '\$.gitBranch') as branch, |
|
json_extract_string(column0, '\$.message.role') as role, |
|
json_extract_string(column0, '\$.message.content') as content |
|
FROM read_csv('$GLOB', |
|
delim=chr(0), header=false, ignore_errors=true, max_line_size=10000000) |
|
), |
|
-- Filter to real text messages (not tool results which start with '[') |
|
msgs AS ( |
|
SELECT * FROM raw |
|
WHERE content IS NOT NULL |
|
AND length(content) > 2 |
|
AND left(content, 1) != '[' |
|
AND left(content, 1) != '{' |
|
) |
|
" |
|
|
|
cmd="${1:-help}" |
|
shift || true |
|
|
|
case "$cmd" in |
|
search) |
|
if [ $# -eq 0 ]; then |
|
echo "Usage: recall-search search <keywords>" >&2 |
|
exit 1 |
|
fi |
|
# Build ILIKE conditions for each keyword |
|
KEYWORDS="$*" |
|
WHERE_CLAUSES="" |
|
for word in $KEYWORDS; do |
|
if [ -n "$WHERE_CLAUSES" ]; then |
|
WHERE_CLAUSES="$WHERE_CLAUSES OR" |
|
fi |
|
WHERE_CLAUSES="$WHERE_CLAUSES content ILIKE '%${word}%'" |
|
done |
|
|
|
$DUCK -c " |
|
$BASE_CTE |
|
, matches AS ( |
|
SELECT |
|
session_id, |
|
ts, |
|
branch, |
|
content, |
|
role |
|
FROM msgs |
|
WHERE msg_type IN ('user', 'assistant') |
|
AND ($WHERE_CLAUSES) |
|
), |
|
scored AS ( |
|
SELECT |
|
session_id, |
|
branch, |
|
min(ts) as first_ts, |
|
max(ts) as last_ts, |
|
count(*) FILTER (WHERE role = 'user') as user_hits, |
|
count(*) as total_hits, |
|
first(content ORDER BY ts) FILTER (WHERE role = 'user') as first_user_msg |
|
FROM matches |
|
GROUP BY session_id, branch |
|
) |
|
SELECT |
|
left(session_id, 8) as session, |
|
strftime(first_ts::timestamp, '%Y-%m-%d %H:%M') as started, |
|
branch, |
|
total_hits as hits, |
|
left(first_user_msg, 120) as first_message |
|
FROM scored |
|
ORDER BY total_hits DESC, last_ts DESC |
|
LIMIT $LIMIT |
|
" |
|
;; |
|
|
|
sessions) |
|
$DUCK -c " |
|
$BASE_CTE |
|
, session_stats AS ( |
|
SELECT |
|
session_id, |
|
branch, |
|
min(ts) as first_ts, |
|
max(ts) as last_ts, |
|
count(*) FILTER (WHERE msg_type = 'user' AND role = 'user') as user_msgs, |
|
count(*) FILTER (WHERE msg_type = 'assistant') as asst_msgs, |
|
first(content ORDER BY ts) FILTER (WHERE msg_type = 'user' AND role = 'user') as first_msg |
|
FROM msgs |
|
WHERE msg_type IN ('user', 'assistant') |
|
GROUP BY session_id, branch |
|
) |
|
SELECT |
|
left(session_id, 8) as session, |
|
strftime(first_ts::timestamp, '%Y-%m-%d %H:%M') as started, |
|
branch, |
|
user_msgs as msgs, |
|
left(first_msg, 100) as first_message |
|
FROM session_stats |
|
WHERE user_msgs > 0 |
|
ORDER BY last_ts DESC |
|
LIMIT $LIMIT |
|
" |
|
;; |
|
|
|
spine) |
|
if [ $# -eq 0 ]; then |
|
echo "Usage: recall-search spine <session-id-prefix>" >&2 |
|
exit 1 |
|
fi |
|
SESSION_PREFIX="$1" |
|
$DUCK -c " |
|
$BASE_CTE |
|
SELECT |
|
role, |
|
ts, |
|
left(content, 500) as content |
|
FROM msgs |
|
WHERE msg_type IN ('user', 'assistant') |
|
AND session_id LIKE '${SESSION_PREFIX}%' |
|
ORDER BY ts ASC |
|
" |
|
;; |
|
|
|
context) |
|
if [ $# -eq 0 ]; then |
|
echo "Usage: recall-search context <session-id-prefix>" >&2 |
|
exit 1 |
|
fi |
|
SESSION_PREFIX="$1" |
|
$DUCK -c " |
|
$BASE_CTE |
|
, session_msgs AS ( |
|
SELECT |
|
role, |
|
ts, |
|
content, |
|
row_number() OVER (ORDER BY ts ASC) as rn, |
|
count(*) OVER () as total |
|
FROM msgs |
|
WHERE msg_type IN ('user', 'assistant') |
|
AND session_id LIKE '${SESSION_PREFIX}%' |
|
) |
|
SELECT |
|
role, |
|
ts, |
|
left(content, 500) as content |
|
FROM session_msgs |
|
WHERE rn <= 10 OR rn > total - 20 |
|
ORDER BY ts ASC |
|
" |
|
;; |
|
|
|
topics) |
|
$DUCK -c " |
|
$BASE_CTE |
|
, session_info AS ( |
|
SELECT |
|
session_id, |
|
branch, |
|
min(ts) as first_ts, |
|
max(ts) as last_ts, |
|
count(*) FILTER (WHERE msg_type = 'user' AND role = 'user') as user_msgs, |
|
sum(length(content)) FILTER (WHERE msg_type = 'user' AND role = 'user') as user_chars, |
|
first(content ORDER BY ts) FILTER (WHERE msg_type = 'user' AND role = 'user') as first_msg |
|
FROM msgs |
|
WHERE msg_type IN ('user', 'assistant') |
|
GROUP BY session_id, branch |
|
) |
|
SELECT |
|
left(session_id, 8) as session, |
|
strftime(first_ts::timestamp, '%Y-%m-%d') as date, |
|
user_msgs as msgs, |
|
printf('%.0f', extract(epoch FROM last_ts::timestamp - first_ts::timestamp) / 60.0) || 'min' as duration, |
|
left(first_msg, 100) as topic |
|
FROM session_info |
|
WHERE user_msgs > 2 |
|
AND left(first_msg, 1) != '<' |
|
ORDER BY first_ts DESC |
|
LIMIT $LIMIT |
|
" |
|
;; |
|
|
|
stats) |
|
echo "## Project Stats" |
|
echo "" |
|
$DUCK -c " |
|
$BASE_CTE |
|
, usage AS ( |
|
SELECT |
|
json_extract_string(column0, '\$.message.model') as model, |
|
CAST(json_extract(column0, '\$.message.usage.output_tokens') AS BIGINT) as out_tok, |
|
CAST(json_extract(column0, '\$.message.usage.cache_read_input_tokens') AS BIGINT) as cache_read, |
|
CAST(json_extract(column0, '\$.message.usage.cache_creation_input_tokens') AS BIGINT) as cache_write |
|
FROM read_csv('$GLOB', |
|
delim=chr(0), header=false, ignore_errors=true, max_line_size=10000000) |
|
WHERE json_extract_string(column0, '\$.type') = 'assistant' |
|
AND json_extract_string(column0, '\$.message.model') IS NOT NULL |
|
) |
|
SELECT |
|
model, |
|
count(*) as turns, |
|
printf('%.1fM', sum(out_tok) / 1e6) as output_tokens, |
|
printf('%.1fB', sum(cache_read) / 1e9) as cache_read, |
|
printf('%.1fM', sum(cache_write) / 1e6) as cache_write |
|
FROM usage |
|
GROUP BY model |
|
ORDER BY count(*) DESC |
|
" |
|
echo "" |
|
echo "### Daily Activity" |
|
echo "" |
|
$DUCK -c " |
|
WITH raw_counts AS ( |
|
SELECT |
|
json_extract_string(column0, '\$.type') as msg_type, |
|
json_extract_string(column0, '\$.sessionId') as session_id, |
|
json_extract_string(column0, '\$.timestamp') as ts |
|
FROM read_csv('$GLOB', |
|
delim=chr(0), header=false, ignore_errors=true, max_line_size=10000000) |
|
WHERE json_extract_string(column0, '\$.type') IN ('user', 'assistant') |
|
) |
|
SELECT |
|
strftime(ts::TIMESTAMP, '%Y-%m-%d') as day, |
|
count(DISTINCT session_id) as sessions, |
|
count(*) FILTER (WHERE msg_type = 'user') as user_msgs, |
|
count(*) FILTER (WHERE msg_type = 'assistant') as asst_turns |
|
FROM raw_counts |
|
GROUP BY day |
|
ORDER BY day |
|
" |
|
echo "" |
|
echo "### Session Duration Distribution" |
|
echo "" |
|
$DUCK -c " |
|
$BASE_CTE |
|
, bounds AS ( |
|
SELECT |
|
session_id, |
|
min(ts::TIMESTAMP) as started, |
|
max(ts::TIMESTAMP) as ended, |
|
count(*) FILTER (WHERE msg_type = 'user') as user_msgs |
|
FROM msgs |
|
WHERE msg_type IN ('user', 'assistant') |
|
GROUP BY session_id |
|
) |
|
SELECT |
|
CASE |
|
WHEN extract(epoch FROM ended - started) / 3600 < 0.5 THEN '<30min' |
|
WHEN extract(epoch FROM ended - started) / 3600 < 1 THEN '30min-1hr' |
|
WHEN extract(epoch FROM ended - started) / 3600 < 2 THEN '1-2hr' |
|
WHEN extract(epoch FROM ended - started) / 3600 < 4 THEN '2-4hr' |
|
ELSE '4hr+' |
|
END as duration, |
|
count(*) as sessions, |
|
avg(user_msgs)::INT as avg_msgs |
|
FROM bounds |
|
WHERE user_msgs > 0 |
|
GROUP BY duration |
|
ORDER BY min(extract(epoch FROM ended - started)) |
|
" |
|
;; |
|
|
|
impact) |
|
# Find the most impactful sessions by engagement (user messages x content length) |
|
COUNT="${1:-15}" |
|
$DUCK -c " |
|
$BASE_CTE |
|
, session_info AS ( |
|
SELECT |
|
session_id, |
|
branch, |
|
min(ts) as first_ts, |
|
max(ts) as last_ts, |
|
count(*) FILTER (WHERE msg_type = 'user' AND role = 'user') as user_msgs, |
|
sum(length(content)) FILTER (WHERE msg_type = 'user' AND role = 'user') as user_chars, |
|
first(content ORDER BY ts) FILTER (WHERE msg_type = 'user' AND role = 'user') as first_msg |
|
FROM msgs |
|
WHERE msg_type IN ('user', 'assistant') |
|
GROUP BY session_id, branch |
|
) |
|
SELECT |
|
left(session_id, 8) as session, |
|
strftime(first_ts::timestamp, '%Y-%m-%d') as date, |
|
user_msgs as msgs, |
|
printf('%.0f', user_chars / 1000.0) || 'k' as chars, |
|
printf('%.1fhr', extract(epoch FROM last_ts::timestamp - first_ts::timestamp) / 3600.0) as duration, |
|
left(first_msg, 90) as topic |
|
FROM session_info |
|
WHERE user_msgs > 3 |
|
AND first_msg IS NOT NULL |
|
AND left(first_msg, 1) != '<' |
|
ORDER BY user_msgs DESC |
|
LIMIT $COUNT |
|
" |
|
;; |
|
|
|
help|*) |
|
echo "recall-search — Search Claude Code conversation history" |
|
echo "" |
|
echo "Usage:" |
|
echo " search <keywords> Search user+assistant messages for keywords" |
|
echo " sessions List all sessions (most recent first)" |
|
echo " spine <session-id> Extract conversation spine" |
|
echo " context <session-id> First 10 + last 20 messages (compact)" |
|
echo " topics Extract topics from all sessions" |
|
echo " stats Project-level usage dashboard" |
|
echo " impact [N] Most impactful sessions by engagement" |
|
echo "" |
|
echo "Project: $HISTORY_DIR" |
|
echo "Files: $FILE_COUNT sessions" |
|
;; |
|
esac |