Skip to content

Instantly share code, notes, and snippets.

@GaryLee
Last active January 10, 2026 13:38
Show Gist options
  • Select an option

  • Save GaryLee/a370c8da1a75a63a9a4dd3f22c6e4bff to your computer and use it in GitHub Desktop.

Select an option

Save GaryLee/a370c8da1a75a63a9a4dd3f22c6e4bff to your computer and use it in GitHub Desktop.
A codegen tool which can put the generating code in comment.
#!python
# coding: utf-8
"""
Utility functions for comment-based code generation.
Single line code generation:
- Lines starting with the specified print symbol (default: '//? ') within the block are treated as f-string of Python.
- The leading spaces of f-string lines is for indentation in Python code. Not the generated code.
- The generated code is inserted into the output at the position of the code generation block.
For example(assume there is a python variable default = 5):
//? int var_a = {default};
int var_a = 5;
//$
Multiple line usage:
- Any comment line starting with the specified code symbol (default: '//% ') is considered part of a code generation block.
- The code generation block ends when a comment line with the specified block end symbol (default: '//$') is encountered.
- Lines within the block are executed as Python code.
- The whole file shares the same local and global context for code generation.
- A .py file with the same name as the target file can be included for additional context.
- For example, for 'example.c', if 'example.c.py' exists, it will be executed first to provide context.
For example:
//% num_of_variables = 5
//% space = lambda n: ' ' * n
//% for i in range(num_of_variables):
//? {space(8)}int var_{i} = {i**2};
int var_0 = 0;
int var_1 = 1;
int var_2 = 4;
int var_3 = 9;
int var_4 = 16;
//$
In-line code generation:
- In-line code generation allows embedding code generation statements directly within a line of code using a specific syntax.
- The syntax for in-line code generation is: /*?<f-string>*/ <replacement> /*$*/.
- The <f-string> is a Python formatted string that can include expressions to be evaluated.
- The <replacement> is the part of the line that will be replaced by the evaluated result of the f-string.
- The /*$*/ marks the end of the in-line code generation statement.
For example(assume there is a python variable size = 16):
int data1[/*? {size} */ 16 /*$*/];
int data2[/*? {size * 2} */ 32 /*$*/];
"""
import sys
import io
import re
import importlib
from pathlib import Path
from dataclasses import dataclass
from collections.abc import Mapping
LEFT = 0
RIGHT = 1
CENTER = 2
@dataclass
class AlignSpec:
"""
Specification for a column in the grid.
:param align: Alignment of the column. 0: left, 1: right, 2: center.
:param width: Width of the column.
:param padding: Fill character for padding.
"""
align: int = LEFT # 0: left, 1: right, 2: center
width: int = 0
padding: str = ' '
def apply(self, data):
align_func = {
LEFT: str.ljust,
RIGHT: str.rjust,
CENTER: str.center
}
return align_func[self.align](str(data), self.width, self.padding)
class Grid:
"""
A simple grid for formatting tabular data.
:param align_spec: A dict specifying the alignment for each column. The key is the column index (0-based), and the value is a AlignSpec object.
:param prefix: A string to be added at the beginning of each row.
:param suffix: A string to be added at the end of each row.
"""
def __init__(self, align_spec=None, prefix="", suffix=""):
if align_spec is None:
self.align_spec = {}
else:
assert isinstance(align_spec, dict), "align_spec must be a dict."
self.align_spec = align_spec
self.prefix = prefix
self.suffix = suffix
self._allow_append_row = True
self._curr_collect = None
self._row_collections = {}
@property
def rows(self):
"""Get all rows from all collections."""
for c in self._row_collections.values():
for r in c:
yield r
def collection(self, key=None):
"""
Get a collection of rows.
:param key: The key of the collection.
"""
return self._row_collections.get(key, [])
def __call__(self, key, allow=True):
"""Switch to a collection of rows. Then use << operator to add rows to the collection.
If cond is False, do nothing.
After adding rows, the current collection will be reset to None.
:param key: The key of the collection.
:param cond: If False, the next << operator will be ignored. If True, the next << operator will add a row to the collection.
"""
self._curr_collect = key
self._allow_append_row = allow() if callable(allow) else bool(allow)
return self
def __lshift__(self, row):
"""
Append a row to the current collection.
:param self: The Grid instance.
:param row: The row to be added, which should be a list or tuple.
"""
if self._allow_append_row:
assert isinstance(row, (tuple, list)), "Only list can be added as a row."
self._row_collections.setdefault(self._curr_collect, []).append(row)
self._curr_collect = None
self._allow_append_row = True
return self
def prepare(self):
"""
Calculating the maximum width for each column.
"""
self.max_column_num = 0
for columns in self.rows:
self.max_column_num = max(self.max_column_num, len(columns))
for i, col in enumerate(columns):
if i not in self.align_spec:
self.align_spec[i] = AlignSpec(align=LEFT, width=len(str(col)))
else:
self.align_spec[i] = AlignSpec(
align=self.align_spec[i].align,
width=max(self.align_spec[i].width, len(str(col)))
)
def __iter__(self):
"""Arrange columns and iterate over the formatted rows."""
return self.arrange()
def arrange(self):
"""Arrange columns and iterate over the formatted rows."""
self.prepare()
for columns in self.rows:
items = []
for i, col in enumerate(columns):
spec = self.align_spec[i]
items.append(spec.apply(col))
if len(columns) < self.max_column_num:
for i in range(len(columns), self.max_column_num):
items.append(self.align_spec[i].apply(""))
yield self.prefix + "".join(items) + self.suffix
def import_module(name, location, import_all=False):
"""Import a module given its name and file location. This is useful when the file contains special characters."""
spec = importlib.util.spec_from_file_location(name=name, location=location)
assert spec is not None, f"Cannot find module file: {location}."
module = importlib.util.module_from_spec(spec)
sys.modules[name] = module # Adds the module to sys.modules
spec.loader.exec_module(module)
if import_all:
globals().update(module.__dict__) # from <module> import *
return module
def extract_newline(s):
"""Extract newline characters from a string. Return the string without newline characters and the extracted newline characters."""
newline = []
for c in reversed(s):
if c not in "\r\n":
break
newline.append(c)
return s[: -len(newline)], "".join(newline[::-1])
def code_print_to_code(lstrip_ln, code_line_print, codegen_func='_codegen_'):
"""
Convert a code print line to a code generation function call.
:param lstrip_ln: The line with leading spaces stripped.
:param code_line_print: The code line print prefix.
:param codegen_func: The code generation function name.
:return: The generated code line.
:rtype: LiteralString | Any
"""
double_lstrip_ln = lstrip_ln[len(code_line_print):].lstrip()
indent = lstrip_ln[len(code_line_print) : len(lstrip_ln) - len(double_lstrip_ln)]
format_string, newline = extract_newline(double_lstrip_ln)
codegen = f'''{indent}{codegen_func}(rf"""{format_string}"""){newline}'''
return codegen
class ReadOnlyMergeDict(Mapping):
"""
A read-only dictionary that merges multiple dictionaries.
Lookup order is from the first dictionary to the last.
"""
def __init__(self, *dicts):
super().__init__()
self.__lazy_dicts_init = False
self.__dicts = dicts
self.__keys = set()
def get_internal_keys(self):
"""Get all keys from the merged dictionaries."""
if not self.__lazy_dicts_init:
self.__lazy_dicts_init = True
for d in self.__dicts:
for k in d.keys():
if k not in self.__keys:
self.__keys.add(k)
return self.__keys
def __setitem__(self, _, __):
"""Prevent item assignment."""
raise TypeError("ReadOnlyMergeDict does not support item assignment.")
def __getitem__(self, key):
"""Get item by key."""
for d in self.__dicts:
if key in d:
return d[key]
raise KeyError(key)
def __contains__(self, key):
"""Check if key is in the merged dictionaries."""
return key in self.get_internal_keys()
def items(self):
"""Get all items from the merged dictionaries."""
for key in self.__keys:
for d in self.__dicts:
if key in d:
yield (key, d[key])
break
def __iter__(self):
"""Iterate over all keys in the merged dictionaries."""
return iter(self.get_internal_keys())
def __len__(self):
"""Get the number of unique keys in the merged dictionaries."""
return len(self.get_internal_keys())
def codegen_inline_proc(ln, local_vars, global_vars):
"""
Process inline code generation statements in a line.
:param ln: The line to be processed.
:param local_vars: The local variables for evaluation.
:param global_vars: The global variables for evaluation.
:return: The processed line.
"""
pattern = re.compile(r"""/\*\?(?P<fstr>.*?)\*/(?P<repl>.*?)(?P<end>/\*\s*\$\s*\*/)""")
new_parts = []
last = 0
eval_context = ReadOnlyMergeDict(local_vars, global_vars)
for m in pattern.finditer(ln):
fstr = m.group('fstr')
value = fstr.format_map(eval_context)
repl_span = m.span('repl')
end_span = m.span('end')
new_parts.append(ln[last:repl_span[0]])
new_parts.append(value)
new_parts.append(ln[repl_span[1]:end_span[1]])
last = end_span[1]
if new_parts:
new_parts.append(ln[last:])
return ''.join(new_parts)
return ln
def code_gen_proc_file(filepath, inplace=True, comment=r"//", code_symbol="% ", code_block="$", code_print="? ", include_py=True):
"""
Extract code generation blocks from comment and execute the code to generate code.
:param filepath: The file to be processed.
:param inplace: If True, replace the original file with the generated file. If False, create a new file with .code-gen suffix.
:param comment: The comment string used in the file.
:param code_symbol: The symbol indicating the start of a code generation line.
:param code_block: The symbol indicating the end of a code generation block.
:param code_print: The symbol indicating a print statement in the code generation block.
:param include_py: If True, include a .py file with the same name as the target file for additional context.
"""
if isinstance(filepath, str):
filepath = Path(filepath)
OUT_CODE_BLOCK, IN_CODE_BLOCK = 0, 1
CODE_LINE_PREFIX = rf"{comment}{code_symbol}"
CODE_LINE_BLOCK_END = rf"{comment}{code_block}"
CODE_LINE_PRINT = rf"{comment}{code_print}"
state = OUT_CODE_BLOCK
local_vars = dict()
global_vars = globals()
cwd = str(filepath.parent)
if cwd not in sys.path:
sys.path.append(cwd)
if include_py:
pyfile = filepath.with_suffix(filepath.suffix + '.py')
if pyfile.exists():
exec(pyfile.read_text(), global_vars, local_vars)
next_state = state
out = io.StringIO()
local_vars["_codegen_"] = lambda *args, **kwargs: print(
*args, **kwargs, file=out
)
codegen_block_count = 0
for ln in filepath.read_text().splitlines(keepends=True):
lstrip_ln = ln.lstrip()
state = next_state
if state == OUT_CODE_BLOCK:
if lstrip_ln.startswith(CODE_LINE_PREFIX):
next_state = IN_CODE_BLOCK
codegen_block_count += 1
content = io.StringIO()
content.write(lstrip_ln[len(CODE_LINE_PREFIX):])
elif lstrip_ln.startswith(CODE_LINE_PRINT):
next_state = IN_CODE_BLOCK
codegen_block_count += 1
content = io.StringIO()
codegen = code_print_to_code(lstrip_ln, CODE_LINE_PRINT, codegen_func='_codegen_')
content.write(codegen)
elif state == IN_CODE_BLOCK:
if lstrip_ln.startswith(CODE_LINE_BLOCK_END):
next_state = OUT_CODE_BLOCK
content.seek(0)
exec(content.read(), global_vars, local_vars)
elif lstrip_ln.startswith(CODE_LINE_PRINT):
codegen = code_print_to_code(lstrip_ln, CODE_LINE_PRINT, codegen_func='_codegen_')
content.write(codegen)
elif lstrip_ln.startswith(CODE_LINE_PREFIX):
content.write(lstrip_ln[len(CODE_LINE_PREFIX):])
else:
assert False, "Invalid state"
if state == OUT_CODE_BLOCK:
ln = codegen_inline_proc(ln, local_vars, global_vars)
out.write(ln)
elif state == IN_CODE_BLOCK:
# Inside code gen block, only comment line will be output.
# The generated code will be output when the block ends.
if ln.lstrip().startswith(comment):
out.write(ln)
else:
assert False, "Invalid state"
if codegen_block_count == 0:
return # No code generation blocks found.
out.seek(0)
if inplace:
filepath.rename(filepath.with_suffix(filepath.suffix + ".code-gen.bak"))
filepath.write_text(out.read())
else:
codegen_file = filepath.with_suffix(filepath.suffix + ".code-gen")
codegen_file.write_text(out.read())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment