Last active
January 10, 2026 13:38
-
-
Save GaryLee/a370c8da1a75a63a9a4dd3f22c6e4bff to your computer and use it in GitHub Desktop.
A codegen tool which can put the generating code in comment.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!python | |
| # coding: utf-8 | |
| """ | |
| Utility functions for comment-based code generation. | |
| Single line code generation: | |
| - Lines starting with the specified print symbol (default: '//? ') within the block are treated as f-string of Python. | |
| - The leading spaces of f-string lines is for indentation in Python code. Not the generated code. | |
| - The generated code is inserted into the output at the position of the code generation block. | |
| For example(assume there is a python variable default = 5): | |
| //? int var_a = {default}; | |
| int var_a = 5; | |
| //$ | |
| Multiple line usage: | |
| - Any comment line starting with the specified code symbol (default: '//% ') is considered part of a code generation block. | |
| - The code generation block ends when a comment line with the specified block end symbol (default: '//$') is encountered. | |
| - Lines within the block are executed as Python code. | |
| - The whole file shares the same local and global context for code generation. | |
| - A .py file with the same name as the target file can be included for additional context. | |
| - For example, for 'example.c', if 'example.c.py' exists, it will be executed first to provide context. | |
| For example: | |
| //% num_of_variables = 5 | |
| //% space = lambda n: ' ' * n | |
| //% for i in range(num_of_variables): | |
| //? {space(8)}int var_{i} = {i**2}; | |
| int var_0 = 0; | |
| int var_1 = 1; | |
| int var_2 = 4; | |
| int var_3 = 9; | |
| int var_4 = 16; | |
| //$ | |
| In-line code generation: | |
| - In-line code generation allows embedding code generation statements directly within a line of code using a specific syntax. | |
| - The syntax for in-line code generation is: /*?<f-string>*/ <replacement> /*$*/. | |
| - The <f-string> is a Python formatted string that can include expressions to be evaluated. | |
| - The <replacement> is the part of the line that will be replaced by the evaluated result of the f-string. | |
| - The /*$*/ marks the end of the in-line code generation statement. | |
| For example(assume there is a python variable size = 16): | |
| int data1[/*? {size} */ 16 /*$*/]; | |
| int data2[/*? {size * 2} */ 32 /*$*/]; | |
| """ | |
| import sys | |
| import io | |
| import re | |
| import importlib | |
| from pathlib import Path | |
| from dataclasses import dataclass | |
| from collections.abc import Mapping | |
| LEFT = 0 | |
| RIGHT = 1 | |
| CENTER = 2 | |
| @dataclass | |
| class AlignSpec: | |
| """ | |
| Specification for a column in the grid. | |
| :param align: Alignment of the column. 0: left, 1: right, 2: center. | |
| :param width: Width of the column. | |
| :param padding: Fill character for padding. | |
| """ | |
| align: int = LEFT # 0: left, 1: right, 2: center | |
| width: int = 0 | |
| padding: str = ' ' | |
| def apply(self, data): | |
| align_func = { | |
| LEFT: str.ljust, | |
| RIGHT: str.rjust, | |
| CENTER: str.center | |
| } | |
| return align_func[self.align](str(data), self.width, self.padding) | |
| class Grid: | |
| """ | |
| A simple grid for formatting tabular data. | |
| :param align_spec: A dict specifying the alignment for each column. The key is the column index (0-based), and the value is a AlignSpec object. | |
| :param prefix: A string to be added at the beginning of each row. | |
| :param suffix: A string to be added at the end of each row. | |
| """ | |
| def __init__(self, align_spec=None, prefix="", suffix=""): | |
| if align_spec is None: | |
| self.align_spec = {} | |
| else: | |
| assert isinstance(align_spec, dict), "align_spec must be a dict." | |
| self.align_spec = align_spec | |
| self.prefix = prefix | |
| self.suffix = suffix | |
| self._allow_append_row = True | |
| self._curr_collect = None | |
| self._row_collections = {} | |
| @property | |
| def rows(self): | |
| """Get all rows from all collections.""" | |
| for c in self._row_collections.values(): | |
| for r in c: | |
| yield r | |
| def collection(self, key=None): | |
| """ | |
| Get a collection of rows. | |
| :param key: The key of the collection. | |
| """ | |
| return self._row_collections.get(key, []) | |
| def __call__(self, key, allow=True): | |
| """Switch to a collection of rows. Then use << operator to add rows to the collection. | |
| If cond is False, do nothing. | |
| After adding rows, the current collection will be reset to None. | |
| :param key: The key of the collection. | |
| :param cond: If False, the next << operator will be ignored. If True, the next << operator will add a row to the collection. | |
| """ | |
| self._curr_collect = key | |
| self._allow_append_row = allow() if callable(allow) else bool(allow) | |
| return self | |
| def __lshift__(self, row): | |
| """ | |
| Append a row to the current collection. | |
| :param self: The Grid instance. | |
| :param row: The row to be added, which should be a list or tuple. | |
| """ | |
| if self._allow_append_row: | |
| assert isinstance(row, (tuple, list)), "Only list can be added as a row." | |
| self._row_collections.setdefault(self._curr_collect, []).append(row) | |
| self._curr_collect = None | |
| self._allow_append_row = True | |
| return self | |
| def prepare(self): | |
| """ | |
| Calculating the maximum width for each column. | |
| """ | |
| self.max_column_num = 0 | |
| for columns in self.rows: | |
| self.max_column_num = max(self.max_column_num, len(columns)) | |
| for i, col in enumerate(columns): | |
| if i not in self.align_spec: | |
| self.align_spec[i] = AlignSpec(align=LEFT, width=len(str(col))) | |
| else: | |
| self.align_spec[i] = AlignSpec( | |
| align=self.align_spec[i].align, | |
| width=max(self.align_spec[i].width, len(str(col))) | |
| ) | |
| def __iter__(self): | |
| """Arrange columns and iterate over the formatted rows.""" | |
| return self.arrange() | |
| def arrange(self): | |
| """Arrange columns and iterate over the formatted rows.""" | |
| self.prepare() | |
| for columns in self.rows: | |
| items = [] | |
| for i, col in enumerate(columns): | |
| spec = self.align_spec[i] | |
| items.append(spec.apply(col)) | |
| if len(columns) < self.max_column_num: | |
| for i in range(len(columns), self.max_column_num): | |
| items.append(self.align_spec[i].apply("")) | |
| yield self.prefix + "".join(items) + self.suffix | |
| def import_module(name, location, import_all=False): | |
| """Import a module given its name and file location. This is useful when the file contains special characters.""" | |
| spec = importlib.util.spec_from_file_location(name=name, location=location) | |
| assert spec is not None, f"Cannot find module file: {location}." | |
| module = importlib.util.module_from_spec(spec) | |
| sys.modules[name] = module # Adds the module to sys.modules | |
| spec.loader.exec_module(module) | |
| if import_all: | |
| globals().update(module.__dict__) # from <module> import * | |
| return module | |
| def extract_newline(s): | |
| """Extract newline characters from a string. Return the string without newline characters and the extracted newline characters.""" | |
| newline = [] | |
| for c in reversed(s): | |
| if c not in "\r\n": | |
| break | |
| newline.append(c) | |
| return s[: -len(newline)], "".join(newline[::-1]) | |
| def code_print_to_code(lstrip_ln, code_line_print, codegen_func='_codegen_'): | |
| """ | |
| Convert a code print line to a code generation function call. | |
| :param lstrip_ln: The line with leading spaces stripped. | |
| :param code_line_print: The code line print prefix. | |
| :param codegen_func: The code generation function name. | |
| :return: The generated code line. | |
| :rtype: LiteralString | Any | |
| """ | |
| double_lstrip_ln = lstrip_ln[len(code_line_print):].lstrip() | |
| indent = lstrip_ln[len(code_line_print) : len(lstrip_ln) - len(double_lstrip_ln)] | |
| format_string, newline = extract_newline(double_lstrip_ln) | |
| codegen = f'''{indent}{codegen_func}(rf"""{format_string}"""){newline}''' | |
| return codegen | |
| class ReadOnlyMergeDict(Mapping): | |
| """ | |
| A read-only dictionary that merges multiple dictionaries. | |
| Lookup order is from the first dictionary to the last. | |
| """ | |
| def __init__(self, *dicts): | |
| super().__init__() | |
| self.__lazy_dicts_init = False | |
| self.__dicts = dicts | |
| self.__keys = set() | |
| def get_internal_keys(self): | |
| """Get all keys from the merged dictionaries.""" | |
| if not self.__lazy_dicts_init: | |
| self.__lazy_dicts_init = True | |
| for d in self.__dicts: | |
| for k in d.keys(): | |
| if k not in self.__keys: | |
| self.__keys.add(k) | |
| return self.__keys | |
| def __setitem__(self, _, __): | |
| """Prevent item assignment.""" | |
| raise TypeError("ReadOnlyMergeDict does not support item assignment.") | |
| def __getitem__(self, key): | |
| """Get item by key.""" | |
| for d in self.__dicts: | |
| if key in d: | |
| return d[key] | |
| raise KeyError(key) | |
| def __contains__(self, key): | |
| """Check if key is in the merged dictionaries.""" | |
| return key in self.get_internal_keys() | |
| def items(self): | |
| """Get all items from the merged dictionaries.""" | |
| for key in self.__keys: | |
| for d in self.__dicts: | |
| if key in d: | |
| yield (key, d[key]) | |
| break | |
| def __iter__(self): | |
| """Iterate over all keys in the merged dictionaries.""" | |
| return iter(self.get_internal_keys()) | |
| def __len__(self): | |
| """Get the number of unique keys in the merged dictionaries.""" | |
| return len(self.get_internal_keys()) | |
| def codegen_inline_proc(ln, local_vars, global_vars): | |
| """ | |
| Process inline code generation statements in a line. | |
| :param ln: The line to be processed. | |
| :param local_vars: The local variables for evaluation. | |
| :param global_vars: The global variables for evaluation. | |
| :return: The processed line. | |
| """ | |
| pattern = re.compile(r"""/\*\?(?P<fstr>.*?)\*/(?P<repl>.*?)(?P<end>/\*\s*\$\s*\*/)""") | |
| new_parts = [] | |
| last = 0 | |
| eval_context = ReadOnlyMergeDict(local_vars, global_vars) | |
| for m in pattern.finditer(ln): | |
| fstr = m.group('fstr') | |
| value = fstr.format_map(eval_context) | |
| repl_span = m.span('repl') | |
| end_span = m.span('end') | |
| new_parts.append(ln[last:repl_span[0]]) | |
| new_parts.append(value) | |
| new_parts.append(ln[repl_span[1]:end_span[1]]) | |
| last = end_span[1] | |
| if new_parts: | |
| new_parts.append(ln[last:]) | |
| return ''.join(new_parts) | |
| return ln | |
| def code_gen_proc_file(filepath, inplace=True, comment=r"//", code_symbol="% ", code_block="$", code_print="? ", include_py=True): | |
| """ | |
| Extract code generation blocks from comment and execute the code to generate code. | |
| :param filepath: The file to be processed. | |
| :param inplace: If True, replace the original file with the generated file. If False, create a new file with .code-gen suffix. | |
| :param comment: The comment string used in the file. | |
| :param code_symbol: The symbol indicating the start of a code generation line. | |
| :param code_block: The symbol indicating the end of a code generation block. | |
| :param code_print: The symbol indicating a print statement in the code generation block. | |
| :param include_py: If True, include a .py file with the same name as the target file for additional context. | |
| """ | |
| if isinstance(filepath, str): | |
| filepath = Path(filepath) | |
| OUT_CODE_BLOCK, IN_CODE_BLOCK = 0, 1 | |
| CODE_LINE_PREFIX = rf"{comment}{code_symbol}" | |
| CODE_LINE_BLOCK_END = rf"{comment}{code_block}" | |
| CODE_LINE_PRINT = rf"{comment}{code_print}" | |
| state = OUT_CODE_BLOCK | |
| local_vars = dict() | |
| global_vars = globals() | |
| cwd = str(filepath.parent) | |
| if cwd not in sys.path: | |
| sys.path.append(cwd) | |
| if include_py: | |
| pyfile = filepath.with_suffix(filepath.suffix + '.py') | |
| if pyfile.exists(): | |
| exec(pyfile.read_text(), global_vars, local_vars) | |
| next_state = state | |
| out = io.StringIO() | |
| local_vars["_codegen_"] = lambda *args, **kwargs: print( | |
| *args, **kwargs, file=out | |
| ) | |
| codegen_block_count = 0 | |
| for ln in filepath.read_text().splitlines(keepends=True): | |
| lstrip_ln = ln.lstrip() | |
| state = next_state | |
| if state == OUT_CODE_BLOCK: | |
| if lstrip_ln.startswith(CODE_LINE_PREFIX): | |
| next_state = IN_CODE_BLOCK | |
| codegen_block_count += 1 | |
| content = io.StringIO() | |
| content.write(lstrip_ln[len(CODE_LINE_PREFIX):]) | |
| elif lstrip_ln.startswith(CODE_LINE_PRINT): | |
| next_state = IN_CODE_BLOCK | |
| codegen_block_count += 1 | |
| content = io.StringIO() | |
| codegen = code_print_to_code(lstrip_ln, CODE_LINE_PRINT, codegen_func='_codegen_') | |
| content.write(codegen) | |
| elif state == IN_CODE_BLOCK: | |
| if lstrip_ln.startswith(CODE_LINE_BLOCK_END): | |
| next_state = OUT_CODE_BLOCK | |
| content.seek(0) | |
| exec(content.read(), global_vars, local_vars) | |
| elif lstrip_ln.startswith(CODE_LINE_PRINT): | |
| codegen = code_print_to_code(lstrip_ln, CODE_LINE_PRINT, codegen_func='_codegen_') | |
| content.write(codegen) | |
| elif lstrip_ln.startswith(CODE_LINE_PREFIX): | |
| content.write(lstrip_ln[len(CODE_LINE_PREFIX):]) | |
| else: | |
| assert False, "Invalid state" | |
| if state == OUT_CODE_BLOCK: | |
| ln = codegen_inline_proc(ln, local_vars, global_vars) | |
| out.write(ln) | |
| elif state == IN_CODE_BLOCK: | |
| # Inside code gen block, only comment line will be output. | |
| # The generated code will be output when the block ends. | |
| if ln.lstrip().startswith(comment): | |
| out.write(ln) | |
| else: | |
| assert False, "Invalid state" | |
| if codegen_block_count == 0: | |
| return # No code generation blocks found. | |
| out.seek(0) | |
| if inplace: | |
| filepath.rename(filepath.with_suffix(filepath.suffix + ".code-gen.bak")) | |
| filepath.write_text(out.read()) | |
| else: | |
| codegen_file = filepath.with_suffix(filepath.suffix + ".code-gen") | |
| codegen_file.write_text(out.read()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment