Last active
February 15, 2025 10:11
-
-
Save RasmusRynell/2bdc809b4a8575f1e11bf6bca347c6a3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| This script generates documentation for Python code by statically analyzing source files. | |
| It extracts metadata such as docstrings, type annotations, imports, dependencies, and more. | |
| The output can be used by an AI to understand a library or codebase, and includes options | |
| to filter by type (module, class, function), name patterns, and whether to include private members. | |
| """ | |
| import ast, os, argparse, importlib, re | |
| from typing import List, Dict, Any, Optional | |
| def is_private(name: str) -> bool: | |
| # Treat dunder methods (e.g. __init__) as public even though they start with underscores. | |
| if name.startswith('__') and name.endswith('__'): | |
| return False | |
| return name.startswith('_') | |
| def format_annotation(node: ast.AST) -> str: | |
| if isinstance(node, ast.Name): | |
| return node.id | |
| elif isinstance(node, ast.Constant): | |
| return str(node.value) | |
| elif isinstance(node, ast.Subscript): | |
| return f"{format_annotation(node.value)}[{format_annotation(node.slice)}]" | |
| elif isinstance(node, ast.Attribute): | |
| return f"{format_annotation(node.value)}.{node.attr}" | |
| elif isinstance(node, ast.Tuple): | |
| return f"({', '.join(format_annotation(elt) for elt in node.elts)})" | |
| elif isinstance(node, ast.List): | |
| return f"[{', '.join(format_annotation(elt) for elt in node.elts)}]" | |
| elif isinstance(node, ast.BinOp) and isinstance(node.op, ast.BitOr): | |
| return f"{format_annotation(node.left)} | {format_annotation(node.right)}" | |
| return str(node) | |
| def extract_docstring(node: ast.AST) -> Optional[str]: | |
| if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Module)): | |
| return ast.get_docstring(node) | |
| return None | |
| def extract_imports(tree: ast.AST) -> List[Dict[str, Any]]: | |
| imports = [] | |
| for node in ast.walk(tree): | |
| if isinstance(node, ast.Import): | |
| for name in node.names: | |
| imports.append({'type': 'import', 'name': name.name, 'asname': name.asname}) | |
| elif isinstance(node, ast.ImportFrom): | |
| module = node.module or '' | |
| for name in node.names: | |
| imports.append({'type': 'from_import', 'module': module, 'name': name.name, 'asname': name.asname}) | |
| return imports | |
| def extract_exceptions(node: ast.FunctionDef | ast.AsyncFunctionDef) -> List[str]: | |
| exceptions = [] | |
| for n in ast.walk(node): | |
| if isinstance(n, ast.Raise): | |
| if isinstance(n.exc, ast.Call) and isinstance(n.exc.func, ast.Name): | |
| exceptions.append(n.exc.func.id) | |
| elif isinstance(n.exc, ast.Name): | |
| exceptions.append(n.exc.id) | |
| return exceptions | |
| def extract_code_examples(docstring: Optional[str]) -> List[str]: | |
| if not docstring: | |
| return [] | |
| examples, current_example, in_example = [], [], False | |
| for line in docstring.split('\n'): | |
| if line.strip().startswith(('>>> ', '... ')): | |
| in_example = True | |
| current_example.append(line) | |
| elif in_example and line.strip(): | |
| current_example.append(line) | |
| elif in_example: | |
| if current_example: | |
| examples.append('\n'.join(current_example)) | |
| current_example, in_example = [], False | |
| if current_example: | |
| examples.append('\n'.join(current_example)) | |
| return examples | |
| def extract_function_info(node: ast.FunctionDef | ast.AsyncFunctionDef) -> Dict[str, Any]: | |
| doc = extract_docstring(node) | |
| info = { | |
| 'name': node.name, | |
| 'docstring': doc, | |
| 'is_async': isinstance(node, ast.AsyncFunctionDef), | |
| 'decorators': [ast.unparse(d) for d in node.decorator_list], | |
| 'parameters': [], | |
| 'return_type': format_annotation(node.returns) if node.returns else None, | |
| 'raises': extract_exceptions(node), | |
| 'examples': extract_code_examples(doc) | |
| } | |
| for arg in node.args.args: | |
| info['parameters'].append({ | |
| 'name': arg.arg, | |
| 'annotation': format_annotation(arg.annotation) if arg.annotation else None, | |
| 'has_default': False | |
| }) | |
| defaults_start = len(node.args.args) - len(node.args.defaults) | |
| for i, default in enumerate(node.args.defaults): | |
| info['parameters'][defaults_start + i]['has_default'] = True | |
| info['parameters'][defaults_start + i]['default'] = ast.unparse(default) | |
| return info | |
| def extract_instance_attributes(node: ast.ClassDef) -> List[Dict[str, Any]]: | |
| """ | |
| Extract instance attributes assigned to self in the __init__ method. | |
| """ | |
| attributes = [] | |
| for item in node.body: | |
| if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)) and item.name == '__init__': | |
| for n in ast.walk(item): | |
| if isinstance(n, ast.Assign): | |
| for target in n.targets: | |
| if isinstance(target, ast.Attribute) and isinstance(target.value, ast.Name) and target.value.id == 'self': | |
| attributes.append({ | |
| 'name': target.attr, | |
| 'has_value': n.value is not None, | |
| 'value': ast.unparse(n.value) if n.value else None | |
| }) | |
| return attributes | |
| def extract_class_info(node: ast.ClassDef) -> Dict[str, Any]: | |
| info = { | |
| 'name': node.name, | |
| 'docstring': extract_docstring(node), | |
| 'bases': [ast.unparse(base) for base in node.bases], | |
| 'decorators': [ast.unparse(d) for d in node.decorator_list], | |
| 'methods': [], | |
| 'properties': [], | |
| 'class_attributes': [], | |
| 'instance_attributes': extract_instance_attributes(node) | |
| } | |
| for item in node.body: | |
| if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)): | |
| method_info = extract_function_info(item) | |
| if any(d.startswith('@property') for d in method_info['decorators']): | |
| info['properties'].append(method_info) | |
| else: | |
| info['methods'].append(method_info) | |
| elif isinstance(item, ast.AnnAssign): | |
| name = item.target.id if isinstance(item.target, ast.Name) else str(item.target) | |
| info['class_attributes'].append({ | |
| 'name': name, | |
| 'type': format_annotation(item.annotation) if item.annotation else None, | |
| 'has_value': item.value is not None | |
| }) | |
| elif isinstance(item, ast.Assign): | |
| for target in item.targets: | |
| if isinstance(target, ast.Name): | |
| info['class_attributes'].append({'name': target.id, 'type': None, 'has_value': True}) | |
| return info | |
| def format_function_signature(func_info: Dict[str, Any]) -> str: | |
| async_prefix = 'async ' if func_info['is_async'] else '' | |
| params = [] | |
| for param in func_info['parameters']: | |
| s = param['name'] | |
| if param['annotation']: | |
| s += f": {param['annotation']}" | |
| if param.get('has_default'): | |
| s += f" = {param['default']}" | |
| params.append(s) | |
| ret = f" -> {func_info['return_type']}" if func_info['return_type'] else "" | |
| decorators = '\n'.join(f"@{d}" for d in func_info['decorators']) | |
| if decorators: | |
| decorators += '\n' | |
| return f"{decorators}{async_prefix}def {func_info['name']}({', '.join(params)}){ret}" | |
| def extract_dependencies() -> Dict[str, str]: | |
| dependencies = {} | |
| if os.path.exists('requirements.txt'): | |
| with open('requirements.txt') as f: | |
| for line in f: | |
| line = line.strip() | |
| if line and not line.startswith('#'): | |
| if '==' in line: | |
| pkg, ver = line.split('==') | |
| dependencies[pkg] = ver | |
| elif '>=' in line: | |
| pkg, ver = line.split('>=') | |
| dependencies[pkg] = f'>={ver}' | |
| if os.path.exists('pyproject.toml'): | |
| try: | |
| import tomli | |
| with open('pyproject.toml', 'rb') as f: | |
| pyproject = tomli.load(f) | |
| if 'project' in pyproject and 'dependencies' in pyproject['project']: | |
| for dep in pyproject['project']['dependencies']: | |
| if ' ' in dep: | |
| pkg, ver = dep.split(' ', 1) | |
| dependencies[pkg] = ver | |
| else: | |
| dependencies[dep] = '*' | |
| except ImportError: | |
| pass | |
| return dependencies | |
| def parse_docstring(docstring: str) -> Dict[str, Any]: | |
| sections = {'description': [], 'params': {}, 'returns': None, 'raises': {}, 'examples': []} | |
| current_section = 'description' | |
| param_pattern = re.compile(r'^(\w+)\s*:\s*(.+)$') | |
| for line in docstring.split('\n'): | |
| line = line.strip() | |
| if line.lower().startswith(('parameters:', 'args:', 'arguments:')): | |
| current_section = 'params' | |
| continue | |
| elif line.lower().startswith(('returns:', 'return:')): | |
| current_section = 'returns' | |
| continue | |
| elif line.lower().startswith(('raises:', 'exceptions:', 'throws:')): | |
| current_section = 'raises' | |
| continue | |
| elif line.lower().startswith('examples:'): | |
| current_section = 'examples' | |
| continue | |
| if current_section == 'description' and line: | |
| sections['description'].append(line) | |
| elif current_section == 'params': | |
| m = param_pattern.match(line) | |
| if m: | |
| pname, desc = m.groups() | |
| sections['params'][pname] = desc.strip() | |
| elif current_section == 'returns' and line: | |
| sections['returns'] = line | |
| elif current_section == 'raises': | |
| m = param_pattern.match(line) | |
| if m: | |
| ename, desc = m.groups() | |
| sections['raises'][ename] = desc.strip() | |
| elif current_section == 'examples' and line: | |
| sections['examples'].append(line) | |
| return sections | |
| def generate_toc(info: List[Dict[str, Any]]) -> List[str]: | |
| toc = ['# Table of Contents\n'] | |
| for item in info: | |
| if item['type'] == 'module': | |
| toc.append(f"- [Module: {item['name']}](#{item['name'].lower()})") | |
| for sub in [i for i in info if i != item]: | |
| if sub['type'] == 'class': | |
| toc.append(f" - [Class: {sub['name']}](#{sub['name'].lower()})") | |
| elif sub['type'] == 'function': | |
| toc.append(f" - [Function: {sub['name']}](#{sub['name'].lower()})") | |
| return toc | |
| def format_documentation(info: List[Dict[str, Any]], args: argparse.Namespace) -> List[str]: | |
| lines = [] | |
| if args.toc: | |
| lines.extend(generate_toc(info)) | |
| lines.append('---') | |
| if args.dependencies: | |
| deps = extract_dependencies() | |
| if deps: | |
| lines.append('# Project Dependencies') | |
| for pkg, ver in deps.items(): | |
| lines.append(f'- `{pkg}`: {ver}') | |
| lines.append('---') | |
| for item in info: | |
| if not args.include_private and is_private(item.get('name', '')): | |
| continue | |
| if args.pattern and not re.search(args.pattern, item.get('name', '')): | |
| continue | |
| if args.type and item['type'] != args.type: | |
| continue | |
| if item['type'] == 'module': | |
| lines.append(f"# Module: {item['name']}") | |
| lines.append(f"File: `{item['file_path']}`") | |
| if 'version' in item: | |
| lines.append(f"Version: {item['version']}") | |
| if item['docstring']: | |
| doc = parse_docstring(item['docstring']) | |
| if doc['description']: | |
| lines.append(' '.join(doc['description'])) | |
| if item['imports']: | |
| lines.append("## Imports") | |
| for imp in item['imports']: | |
| as_part = f" as {imp['asname']}" if imp['asname'] else "" | |
| if imp['type'] == 'import': | |
| lines.append(f"- `import {imp['name']}{as_part}`") | |
| else: | |
| lines.append(f"- `from {imp['module']} import {imp['name']}{as_part}`") | |
| elif item['type'] == 'class': | |
| bases = f"({', '.join(item['bases'])})" if item['bases'] else "" | |
| lines.append(f"## Class: {item['name']}{bases}") | |
| if item['docstring']: | |
| lines.append(item['docstring']) | |
| if item['class_attributes']: | |
| attrs = [a for a in item['class_attributes'] if args.include_private or not is_private(a['name'])] | |
| if attrs: | |
| lines.append("### Class Attributes:") | |
| for attr in attrs: | |
| type_ann = f": {attr['type']}" if attr['type'] else "" | |
| lines.append(f"- {attr['name']}{type_ann}") | |
| if item.get('instance_attributes'): | |
| attrs = [a for a in item['instance_attributes'] if args.include_private or not is_private(a['name'])] | |
| if attrs: | |
| lines.append("### Instance Attributes:") | |
| for attr in attrs: | |
| value = f" = {attr['value']}" if attr['has_value'] and attr['value'] else "" | |
| lines.append(f"- {attr['name']}{value}") | |
| if item['properties']: | |
| props = [p for p in item['properties'] if args.include_private or not is_private(p['name'])] | |
| if props: | |
| lines.append("### Properties:") | |
| for prop in props: | |
| lines.append(f"#### @property {prop['name']}") | |
| if prop['docstring']: | |
| lines.append(prop['docstring']) | |
| if item['methods']: | |
| methods = [m for m in item['methods'] if args.include_private or not is_private(m['name'])] | |
| if methods: | |
| lines.append("### Methods:") | |
| for method in methods: | |
| lines.append(f"#### {format_function_signature(method)}") | |
| if method['docstring']: | |
| lines.append(method['docstring']) | |
| examples = extract_code_examples(item['docstring']) | |
| if examples: | |
| lines.append("#### Examples:") | |
| for ex in examples: | |
| lines.append("```python") | |
| lines.append(ex) | |
| lines.append("```") | |
| elif item['type'] == 'function': | |
| lines.append(f"## Function: {format_function_signature(item)}") | |
| if item['docstring']: | |
| lines.append(item['docstring']) | |
| if item['raises']: | |
| lines.append("### Raises:") | |
| for exc in item['raises']: | |
| lines.append(f"- `{exc}`") | |
| if item['examples']: | |
| lines.append("### Examples:") | |
| for ex in item['examples']: | |
| lines.append("```python") | |
| lines.append(ex) | |
| lines.append("```") | |
| lines.append('') | |
| return [l for l in lines if l] | |
| def extract_info_from_file(filepath: str) -> List[Dict[str, Any]]: | |
| try: | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| tree = ast.parse(f.read()) | |
| except SyntaxError: | |
| print(f"Syntax error in file: {filepath}") | |
| return [] | |
| info = [] | |
| module_name = os.path.splitext(os.path.basename(filepath))[0] | |
| rel_path = os.path.relpath(filepath) | |
| module_info = { | |
| 'type': 'module', | |
| 'name': module_name, | |
| 'docstring': extract_docstring(tree), | |
| 'file_path': rel_path, | |
| 'imports': extract_imports(tree) | |
| } | |
| try: | |
| spec = importlib.util.spec_from_file_location(module_name, filepath) | |
| if spec and spec.loader: | |
| mod = importlib.util.module_from_spec(spec) | |
| spec.loader.exec_module(mod) | |
| if hasattr(mod, '__version__'): | |
| module_info['version'] = mod.__version__ | |
| except Exception: | |
| pass | |
| info.append(module_info) | |
| for node in tree.body: | |
| if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): | |
| func = extract_function_info(node) | |
| func['type'] = 'function' | |
| info.append(func) | |
| elif isinstance(node, ast.ClassDef): | |
| cls = extract_class_info(node) | |
| cls['type'] = 'class' | |
| info.append(cls) | |
| return info | |
| def process_directory(directory: str, excluded_dirs: List[str]) -> List[Dict[str, Any]]: | |
| all_info = [] | |
| for root, dirs, files in os.walk(directory): | |
| dirs[:] = [d for d in dirs if d not in excluded_dirs] | |
| for file in files: | |
| if file.endswith(".py"): | |
| filepath = os.path.join(root, file) | |
| all_info.extend(extract_info_from_file(filepath)) | |
| return all_info | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Generate Python code documentation') | |
| parser.add_argument('directory', help='Directory containing Python files to document') | |
| parser.add_argument('--format', '-f', choices=['text', 'markdown'], default='markdown', | |
| help='Output format (default: markdown)') | |
| parser.add_argument('--include-private', '-p', action='store_true', | |
| help='Include private members (default excludes them)') | |
| parser.add_argument('--toc', '-t', action='store_true', help='Include table of contents') | |
| parser.add_argument('--dependencies', '-d', action='store_true', help='Include project dependencies') | |
| parser.add_argument('--pattern', '-n', type=str, help='Filter by name pattern (regex)') | |
| parser.add_argument('--type', '-y', choices=['module', 'class', 'function'], | |
| help='Filter by type') | |
| parser.add_argument('--exclude-dir', '-e', action='append', default=[], | |
| help='Exclude folders with these names (multiple allowed)') | |
| args = parser.parse_args() | |
| info = process_directory(args.directory, args.exclude_dir) | |
| for line in format_documentation(info, args): | |
| print(line) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment