Created
January 20, 2026 02:16
-
-
Save darinkishore/3ca3ff1990f2415c6bfe29b6bd017df6 to your computer and use it in GitHub Desktop.
dspy rlm intro gist
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import os | |
| from datetime import datetime | |
| from pathlib import Path | |
| import dspy | |
| class AnalyzeAndUnderstand(dspy.Signature): | |
| """ | |
| Given a dictionary of files and a query, understand and thoroughly understand the files in order to answer the query. | |
| Be proactive and make sure that no information is missing or omitted from the final answer. | |
| """ | |
| directory: dict = dspy.InputField( | |
| desc='A folder on the filesystem. A high quality set of of jujutsu vcs blog posts, official documentation, and books.' | |
| ) | |
| query: str = dspy.InputField(desc='The question to answer.') | |
| answer: str = dspy.OutputField( | |
| desc='Comprehensive answer to the query based on exploration of the files in the directory' | |
| ) | |
| snippets: list[str] = dspy.OutputField(desc='Relevant text snippets') | |
| file_refs: list[str] = dspy.OutputField(desc='Referenced file paths') | |
| lm = dspy.LM( | |
| 'cerebras/zai-glm-4.7', | |
| api_key=os.environ['CEREBRAS_API_KEY'], | |
| api_base='https://api.cerebras.ai/v1', | |
| temperature=1.0, | |
| top_p=0.95, | |
| max_tokens=40000, | |
| disable_reasoning=False, | |
| clear_thinking=False, | |
| ) | |
| dspy.configure(lm=lm) | |
| RUNS_DIR = Path('runs') | |
| QUESTIONS_FILE = Path('questions.json') | |
| DATA_DIR = Path('data') | |
| def load_folder(folder_path: Path | str, extensions: list[str] = None) -> dict: | |
| """Load folder into nested dict. Keys are relative paths, values are file contents.""" | |
| extensions = extensions or ['.md', '.txt', '.py'] | |
| root = Path(folder_path) | |
| tree = {} | |
| for path in root.rglob('*'): | |
| if path.is_file() and path.suffix in extensions: | |
| rel_path = str(path.relative_to(root)) | |
| tree[rel_path] = path.read_text(errors='ignore') | |
| return tree | |
| def investigate(file_tree: dict, query: str, max_iterations: int = 30): | |
| rlm = dspy.RLM( | |
| AnalyzeAndUnderstand, | |
| max_iterations=max_iterations, | |
| max_llm_calls=60, | |
| verbose=True, | |
| ) | |
| result = rlm(directory=file_tree, query=query) | |
| return { | |
| 'answer': result.answer, | |
| 'snippets': result.snippets, | |
| 'file_refs': result.file_refs, | |
| 'trajectory': result.trajectory, | |
| } | |
| def load_questions() -> dict: | |
| with open(QUESTIONS_FILE) as f: | |
| return json.load(f) | |
| def get_question(qid: str) -> dict | None: | |
| data = load_questions() | |
| for cat in data['questions']: | |
| for q in cat['questions']: | |
| if q['id'] == qid: | |
| return {'category': cat['category'], **q} | |
| return None | |
| def list_questions() -> list[dict]: | |
| data = load_questions() | |
| return [ | |
| {'category': cat['category'], **q} | |
| for cat in data['questions'] | |
| for q in cat['questions'] | |
| ] | |
| def run_question(qid: str, max_iterations: int = 10) -> dict: | |
| def run_question(qid: str, max_iterations: int = 30) -> dict: | |
| q = get_question(qid) | |
| if not q: | |
| raise ValueError(f'Question {qid} not found') | |
| tree = load_folder(DATA_DIR) | |
| print(f'Loaded {len(tree)} files') | |
| print(f'Running: {q["id"]} - {q["question"][:60]}...') | |
| result = investigate(tree, q['question'], max_iterations=max_iterations) | |
| output = { | |
| 'id': q['id'], | |
| 'category': q['category'], | |
| 'query': q['question'], | |
| 'timestamp': datetime.now().isoformat(), | |
| 'answer': result['answer'], | |
| 'snippets': result['snippets'], | |
| 'file_refs': result['file_refs'], | |
| 'trajectory': result['trajectory'], | |
| 'config': {'model': 'cerebras/zai-glm-4.7', 'max_iterations': max_iterations}, | |
| } | |
| out_dir = RUNS_DIR / qid | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| out_file = out_dir / f'{datetime.now().strftime("%Y-%m-%dT%H-%M-%S")}.json' | |
| with open(out_file, 'w') as f: | |
| json.dump(output, f, indent=2) | |
| print(f'Saved to {out_file}') | |
| return output | |
| if __name__ == '__main__': | |
| import sys | |
| if len(sys.argv) < 2: | |
| print('Usage: uv run investigate.py <question_id>') | |
| print('\nQuestions:') | |
| for q in list_questions(): | |
| print(f' {q["id"]}: {q["question"][:60]}...') | |
| sys.exit(0) | |
| run_question(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment