Skip to content

Instantly share code, notes, and snippets.

@RasmusRynell
Last active February 15, 2025 10:11
Show Gist options
  • Select an option

  • Save RasmusRynell/2bdc809b4a8575f1e11bf6bca347c6a3 to your computer and use it in GitHub Desktop.

Select an option

Save RasmusRynell/2bdc809b4a8575f1e11bf6bca347c6a3 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
This script generates documentation for Python code by statically analyzing source files.
It extracts metadata such as docstrings, type annotations, imports, dependencies, and more.
The output can be used by an AI to understand a library or codebase, and includes options
to filter by type (module, class, function), name patterns, and whether to include private members.
"""
import ast, os, argparse, importlib, re
from typing import List, Dict, Any, Optional
def is_private(name: str) -> bool:
# Treat dunder methods (e.g. __init__) as public even though they start with underscores.
if name.startswith('__') and name.endswith('__'):
return False
return name.startswith('_')
def format_annotation(node: ast.AST) -> str:
if isinstance(node, ast.Name):
return node.id
elif isinstance(node, ast.Constant):
return str(node.value)
elif isinstance(node, ast.Subscript):
return f"{format_annotation(node.value)}[{format_annotation(node.slice)}]"
elif isinstance(node, ast.Attribute):
return f"{format_annotation(node.value)}.{node.attr}"
elif isinstance(node, ast.Tuple):
return f"({', '.join(format_annotation(elt) for elt in node.elts)})"
elif isinstance(node, ast.List):
return f"[{', '.join(format_annotation(elt) for elt in node.elts)}]"
elif isinstance(node, ast.BinOp) and isinstance(node.op, ast.BitOr):
return f"{format_annotation(node.left)} | {format_annotation(node.right)}"
return str(node)
def extract_docstring(node: ast.AST) -> Optional[str]:
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Module)):
return ast.get_docstring(node)
return None
def extract_imports(tree: ast.AST) -> List[Dict[str, Any]]:
imports = []
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for name in node.names:
imports.append({'type': 'import', 'name': name.name, 'asname': name.asname})
elif isinstance(node, ast.ImportFrom):
module = node.module or ''
for name in node.names:
imports.append({'type': 'from_import', 'module': module, 'name': name.name, 'asname': name.asname})
return imports
def extract_exceptions(node: ast.FunctionDef | ast.AsyncFunctionDef) -> List[str]:
exceptions = []
for n in ast.walk(node):
if isinstance(n, ast.Raise):
if isinstance(n.exc, ast.Call) and isinstance(n.exc.func, ast.Name):
exceptions.append(n.exc.func.id)
elif isinstance(n.exc, ast.Name):
exceptions.append(n.exc.id)
return exceptions
def extract_code_examples(docstring: Optional[str]) -> List[str]:
if not docstring:
return []
examples, current_example, in_example = [], [], False
for line in docstring.split('\n'):
if line.strip().startswith(('>>> ', '... ')):
in_example = True
current_example.append(line)
elif in_example and line.strip():
current_example.append(line)
elif in_example:
if current_example:
examples.append('\n'.join(current_example))
current_example, in_example = [], False
if current_example:
examples.append('\n'.join(current_example))
return examples
def extract_function_info(node: ast.FunctionDef | ast.AsyncFunctionDef) -> Dict[str, Any]:
doc = extract_docstring(node)
info = {
'name': node.name,
'docstring': doc,
'is_async': isinstance(node, ast.AsyncFunctionDef),
'decorators': [ast.unparse(d) for d in node.decorator_list],
'parameters': [],
'return_type': format_annotation(node.returns) if node.returns else None,
'raises': extract_exceptions(node),
'examples': extract_code_examples(doc)
}
for arg in node.args.args:
info['parameters'].append({
'name': arg.arg,
'annotation': format_annotation(arg.annotation) if arg.annotation else None,
'has_default': False
})
defaults_start = len(node.args.args) - len(node.args.defaults)
for i, default in enumerate(node.args.defaults):
info['parameters'][defaults_start + i]['has_default'] = True
info['parameters'][defaults_start + i]['default'] = ast.unparse(default)
return info
def extract_instance_attributes(node: ast.ClassDef) -> List[Dict[str, Any]]:
"""
Extract instance attributes assigned to self in the __init__ method.
"""
attributes = []
for item in node.body:
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)) and item.name == '__init__':
for n in ast.walk(item):
if isinstance(n, ast.Assign):
for target in n.targets:
if isinstance(target, ast.Attribute) and isinstance(target.value, ast.Name) and target.value.id == 'self':
attributes.append({
'name': target.attr,
'has_value': n.value is not None,
'value': ast.unparse(n.value) if n.value else None
})
return attributes
def extract_class_info(node: ast.ClassDef) -> Dict[str, Any]:
info = {
'name': node.name,
'docstring': extract_docstring(node),
'bases': [ast.unparse(base) for base in node.bases],
'decorators': [ast.unparse(d) for d in node.decorator_list],
'methods': [],
'properties': [],
'class_attributes': [],
'instance_attributes': extract_instance_attributes(node)
}
for item in node.body:
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
method_info = extract_function_info(item)
if any(d.startswith('@property') for d in method_info['decorators']):
info['properties'].append(method_info)
else:
info['methods'].append(method_info)
elif isinstance(item, ast.AnnAssign):
name = item.target.id if isinstance(item.target, ast.Name) else str(item.target)
info['class_attributes'].append({
'name': name,
'type': format_annotation(item.annotation) if item.annotation else None,
'has_value': item.value is not None
})
elif isinstance(item, ast.Assign):
for target in item.targets:
if isinstance(target, ast.Name):
info['class_attributes'].append({'name': target.id, 'type': None, 'has_value': True})
return info
def format_function_signature(func_info: Dict[str, Any]) -> str:
async_prefix = 'async ' if func_info['is_async'] else ''
params = []
for param in func_info['parameters']:
s = param['name']
if param['annotation']:
s += f": {param['annotation']}"
if param.get('has_default'):
s += f" = {param['default']}"
params.append(s)
ret = f" -> {func_info['return_type']}" if func_info['return_type'] else ""
decorators = '\n'.join(f"@{d}" for d in func_info['decorators'])
if decorators:
decorators += '\n'
return f"{decorators}{async_prefix}def {func_info['name']}({', '.join(params)}){ret}"
def extract_dependencies() -> Dict[str, str]:
dependencies = {}
if os.path.exists('requirements.txt'):
with open('requirements.txt') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#'):
if '==' in line:
pkg, ver = line.split('==')
dependencies[pkg] = ver
elif '>=' in line:
pkg, ver = line.split('>=')
dependencies[pkg] = f'>={ver}'
if os.path.exists('pyproject.toml'):
try:
import tomli
with open('pyproject.toml', 'rb') as f:
pyproject = tomli.load(f)
if 'project' in pyproject and 'dependencies' in pyproject['project']:
for dep in pyproject['project']['dependencies']:
if ' ' in dep:
pkg, ver = dep.split(' ', 1)
dependencies[pkg] = ver
else:
dependencies[dep] = '*'
except ImportError:
pass
return dependencies
def parse_docstring(docstring: str) -> Dict[str, Any]:
sections = {'description': [], 'params': {}, 'returns': None, 'raises': {}, 'examples': []}
current_section = 'description'
param_pattern = re.compile(r'^(\w+)\s*:\s*(.+)$')
for line in docstring.split('\n'):
line = line.strip()
if line.lower().startswith(('parameters:', 'args:', 'arguments:')):
current_section = 'params'
continue
elif line.lower().startswith(('returns:', 'return:')):
current_section = 'returns'
continue
elif line.lower().startswith(('raises:', 'exceptions:', 'throws:')):
current_section = 'raises'
continue
elif line.lower().startswith('examples:'):
current_section = 'examples'
continue
if current_section == 'description' and line:
sections['description'].append(line)
elif current_section == 'params':
m = param_pattern.match(line)
if m:
pname, desc = m.groups()
sections['params'][pname] = desc.strip()
elif current_section == 'returns' and line:
sections['returns'] = line
elif current_section == 'raises':
m = param_pattern.match(line)
if m:
ename, desc = m.groups()
sections['raises'][ename] = desc.strip()
elif current_section == 'examples' and line:
sections['examples'].append(line)
return sections
def generate_toc(info: List[Dict[str, Any]]) -> List[str]:
toc = ['# Table of Contents\n']
for item in info:
if item['type'] == 'module':
toc.append(f"- [Module: {item['name']}](#{item['name'].lower()})")
for sub in [i for i in info if i != item]:
if sub['type'] == 'class':
toc.append(f" - [Class: {sub['name']}](#{sub['name'].lower()})")
elif sub['type'] == 'function':
toc.append(f" - [Function: {sub['name']}](#{sub['name'].lower()})")
return toc
def format_documentation(info: List[Dict[str, Any]], args: argparse.Namespace) -> List[str]:
lines = []
if args.toc:
lines.extend(generate_toc(info))
lines.append('---')
if args.dependencies:
deps = extract_dependencies()
if deps:
lines.append('# Project Dependencies')
for pkg, ver in deps.items():
lines.append(f'- `{pkg}`: {ver}')
lines.append('---')
for item in info:
if not args.include_private and is_private(item.get('name', '')):
continue
if args.pattern and not re.search(args.pattern, item.get('name', '')):
continue
if args.type and item['type'] != args.type:
continue
if item['type'] == 'module':
lines.append(f"# Module: {item['name']}")
lines.append(f"File: `{item['file_path']}`")
if 'version' in item:
lines.append(f"Version: {item['version']}")
if item['docstring']:
doc = parse_docstring(item['docstring'])
if doc['description']:
lines.append(' '.join(doc['description']))
if item['imports']:
lines.append("## Imports")
for imp in item['imports']:
as_part = f" as {imp['asname']}" if imp['asname'] else ""
if imp['type'] == 'import':
lines.append(f"- `import {imp['name']}{as_part}`")
else:
lines.append(f"- `from {imp['module']} import {imp['name']}{as_part}`")
elif item['type'] == 'class':
bases = f"({', '.join(item['bases'])})" if item['bases'] else ""
lines.append(f"## Class: {item['name']}{bases}")
if item['docstring']:
lines.append(item['docstring'])
if item['class_attributes']:
attrs = [a for a in item['class_attributes'] if args.include_private or not is_private(a['name'])]
if attrs:
lines.append("### Class Attributes:")
for attr in attrs:
type_ann = f": {attr['type']}" if attr['type'] else ""
lines.append(f"- {attr['name']}{type_ann}")
if item.get('instance_attributes'):
attrs = [a for a in item['instance_attributes'] if args.include_private or not is_private(a['name'])]
if attrs:
lines.append("### Instance Attributes:")
for attr in attrs:
value = f" = {attr['value']}" if attr['has_value'] and attr['value'] else ""
lines.append(f"- {attr['name']}{value}")
if item['properties']:
props = [p for p in item['properties'] if args.include_private or not is_private(p['name'])]
if props:
lines.append("### Properties:")
for prop in props:
lines.append(f"#### @property {prop['name']}")
if prop['docstring']:
lines.append(prop['docstring'])
if item['methods']:
methods = [m for m in item['methods'] if args.include_private or not is_private(m['name'])]
if methods:
lines.append("### Methods:")
for method in methods:
lines.append(f"#### {format_function_signature(method)}")
if method['docstring']:
lines.append(method['docstring'])
examples = extract_code_examples(item['docstring'])
if examples:
lines.append("#### Examples:")
for ex in examples:
lines.append("```python")
lines.append(ex)
lines.append("```")
elif item['type'] == 'function':
lines.append(f"## Function: {format_function_signature(item)}")
if item['docstring']:
lines.append(item['docstring'])
if item['raises']:
lines.append("### Raises:")
for exc in item['raises']:
lines.append(f"- `{exc}`")
if item['examples']:
lines.append("### Examples:")
for ex in item['examples']:
lines.append("```python")
lines.append(ex)
lines.append("```")
lines.append('')
return [l for l in lines if l]
def extract_info_from_file(filepath: str) -> List[Dict[str, Any]]:
try:
with open(filepath, 'r', encoding='utf-8') as f:
tree = ast.parse(f.read())
except SyntaxError:
print(f"Syntax error in file: {filepath}")
return []
info = []
module_name = os.path.splitext(os.path.basename(filepath))[0]
rel_path = os.path.relpath(filepath)
module_info = {
'type': 'module',
'name': module_name,
'docstring': extract_docstring(tree),
'file_path': rel_path,
'imports': extract_imports(tree)
}
try:
spec = importlib.util.spec_from_file_location(module_name, filepath)
if spec and spec.loader:
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
if hasattr(mod, '__version__'):
module_info['version'] = mod.__version__
except Exception:
pass
info.append(module_info)
for node in tree.body:
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
func = extract_function_info(node)
func['type'] = 'function'
info.append(func)
elif isinstance(node, ast.ClassDef):
cls = extract_class_info(node)
cls['type'] = 'class'
info.append(cls)
return info
def process_directory(directory: str, excluded_dirs: List[str]) -> List[Dict[str, Any]]:
all_info = []
for root, dirs, files in os.walk(directory):
dirs[:] = [d for d in dirs if d not in excluded_dirs]
for file in files:
if file.endswith(".py"):
filepath = os.path.join(root, file)
all_info.extend(extract_info_from_file(filepath))
return all_info
def main():
parser = argparse.ArgumentParser(description='Generate Python code documentation')
parser.add_argument('directory', help='Directory containing Python files to document')
parser.add_argument('--format', '-f', choices=['text', 'markdown'], default='markdown',
help='Output format (default: markdown)')
parser.add_argument('--include-private', '-p', action='store_true',
help='Include private members (default excludes them)')
parser.add_argument('--toc', '-t', action='store_true', help='Include table of contents')
parser.add_argument('--dependencies', '-d', action='store_true', help='Include project dependencies')
parser.add_argument('--pattern', '-n', type=str, help='Filter by name pattern (regex)')
parser.add_argument('--type', '-y', choices=['module', 'class', 'function'],
help='Filter by type')
parser.add_argument('--exclude-dir', '-e', action='append', default=[],
help='Exclude folders with these names (multiple allowed)')
args = parser.parse_args()
info = process_directory(args.directory, args.exclude_dir)
for line in format_documentation(info, args):
print(line)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment