Skip to content

Instantly share code, notes, and snippets.

@darinkishore
Created January 20, 2026 02:16
Show Gist options
  • Select an option

  • Save darinkishore/3ca3ff1990f2415c6bfe29b6bd017df6 to your computer and use it in GitHub Desktop.

Select an option

Save darinkishore/3ca3ff1990f2415c6bfe29b6bd017df6 to your computer and use it in GitHub Desktop.
dspy rlm intro gist
import json
import os
from datetime import datetime
from pathlib import Path
import dspy
class AnalyzeAndUnderstand(dspy.Signature):
"""
Given a dictionary of files and a query, understand and thoroughly understand the files in order to answer the query.
Be proactive and make sure that no information is missing or omitted from the final answer.
"""
directory: dict = dspy.InputField(
desc='A folder on the filesystem. A high quality set of of jujutsu vcs blog posts, official documentation, and books.'
)
query: str = dspy.InputField(desc='The question to answer.')
answer: str = dspy.OutputField(
desc='Comprehensive answer to the query based on exploration of the files in the directory'
)
snippets: list[str] = dspy.OutputField(desc='Relevant text snippets')
file_refs: list[str] = dspy.OutputField(desc='Referenced file paths')
lm = dspy.LM(
'cerebras/zai-glm-4.7',
api_key=os.environ['CEREBRAS_API_KEY'],
api_base='https://api.cerebras.ai/v1',
temperature=1.0,
top_p=0.95,
max_tokens=40000,
disable_reasoning=False,
clear_thinking=False,
)
dspy.configure(lm=lm)
RUNS_DIR = Path('runs')
QUESTIONS_FILE = Path('questions.json')
DATA_DIR = Path('data')
def load_folder(folder_path: Path | str, extensions: list[str] = None) -> dict:
"""Load folder into nested dict. Keys are relative paths, values are file contents."""
extensions = extensions or ['.md', '.txt', '.py']
root = Path(folder_path)
tree = {}
for path in root.rglob('*'):
if path.is_file() and path.suffix in extensions:
rel_path = str(path.relative_to(root))
tree[rel_path] = path.read_text(errors='ignore')
return tree
def investigate(file_tree: dict, query: str, max_iterations: int = 30):
rlm = dspy.RLM(
AnalyzeAndUnderstand,
max_iterations=max_iterations,
max_llm_calls=60,
verbose=True,
)
result = rlm(directory=file_tree, query=query)
return {
'answer': result.answer,
'snippets': result.snippets,
'file_refs': result.file_refs,
'trajectory': result.trajectory,
}
def load_questions() -> dict:
with open(QUESTIONS_FILE) as f:
return json.load(f)
def get_question(qid: str) -> dict | None:
data = load_questions()
for cat in data['questions']:
for q in cat['questions']:
if q['id'] == qid:
return {'category': cat['category'], **q}
return None
def list_questions() -> list[dict]:
data = load_questions()
return [
{'category': cat['category'], **q}
for cat in data['questions']
for q in cat['questions']
]
def run_question(qid: str, max_iterations: int = 10) -> dict:
def run_question(qid: str, max_iterations: int = 30) -> dict:
q = get_question(qid)
if not q:
raise ValueError(f'Question {qid} not found')
tree = load_folder(DATA_DIR)
print(f'Loaded {len(tree)} files')
print(f'Running: {q["id"]} - {q["question"][:60]}...')
result = investigate(tree, q['question'], max_iterations=max_iterations)
output = {
'id': q['id'],
'category': q['category'],
'query': q['question'],
'timestamp': datetime.now().isoformat(),
'answer': result['answer'],
'snippets': result['snippets'],
'file_refs': result['file_refs'],
'trajectory': result['trajectory'],
'config': {'model': 'cerebras/zai-glm-4.7', 'max_iterations': max_iterations},
}
out_dir = RUNS_DIR / qid
out_dir.mkdir(parents=True, exist_ok=True)
out_file = out_dir / f'{datetime.now().strftime("%Y-%m-%dT%H-%M-%S")}.json'
with open(out_file, 'w') as f:
json.dump(output, f, indent=2)
print(f'Saved to {out_file}')
return output
if __name__ == '__main__':
import sys
if len(sys.argv) < 2:
print('Usage: uv run investigate.py <question_id>')
print('\nQuestions:')
for q in list_questions():
print(f' {q["id"]}: {q["question"][:60]}...')
sys.exit(0)
run_question(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment