Created
January 23, 2026 21:44
-
-
Save renanliberato/1dcb45412dfa795e5ea207297126fed1 to your computer and use it in GitHub Desktop.
files-to-prompt.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import argparse | |
| import os | |
| from pathlib import Path | |
| DEFAULT_EXCLUDE_DIRS = {".git", ".hg", ".svn", "node_modules", ".venv", "venv", "__pycache__"} | |
| def is_binary_file(path: Path, sniff_bytes: int = 8192) -> bool: | |
| try: | |
| with path.open("rb") as f: | |
| chunk = f.read(sniff_bytes) | |
| return b"\x00" in chunk | |
| except Exception: | |
| return True | |
| def rel_for_display(root: Path, p: Path) -> str: | |
| try: | |
| rel = p.resolve().relative_to(root.resolve()) | |
| return str(rel).replace(os.sep, "/") | |
| except Exception: | |
| return str(p.resolve()) | |
| def build_tree_listing(rel_paths: list[str]) -> str: | |
| tree = {} | |
| for rel in rel_paths: | |
| parts = [x for x in rel.split("/") if x] | |
| cur = tree | |
| for i, part in enumerate(parts): | |
| cur = cur.setdefault(part, {} if i < len(parts) - 1 else None) | |
| lines = [] | |
| def walk(node, prefix=""): | |
| items = sorted(node.items(), key=lambda kv: (kv[1] is None, kv[0].lower())) | |
| for idx, (name, child) in enumerate(items): | |
| last = idx == len(items) - 1 | |
| connector = "└── " if last else "├── " | |
| lines.append(prefix + connector + name) | |
| if isinstance(child, dict): | |
| extension = " " if last else "│ " | |
| walk(child, prefix + extension) | |
| walk(tree, "") | |
| return "\n".join(lines) if lines else "(no files)" | |
| def next_archive_name(base_dir: Path, prefix: str = "prompt_", ext: str = ".txt") -> Path: | |
| # Find next available prompt_0001.txt, prompt_0002.txt, ... | |
| i = 1 | |
| while True: | |
| candidate = base_dir / f"{prefix}{i:04d}{ext}" | |
| if not candidate.exists(): | |
| return candidate | |
| i += 1 | |
| def read_paths_from_stdin() -> list[str]: | |
| print("\nPaste file paths (one per line).") | |
| print("- End input with an empty line.") | |
| print("- Lines starting with # are ignored.") | |
| print("- Type 'quit' or 'exit' on a line by itself to stop.\n") | |
| lines = [] | |
| while True: | |
| try: | |
| line = input() | |
| except EOFError: | |
| # Allow piping / EOF to behave like "done" | |
| break | |
| stripped = line.strip() | |
| if stripped.lower() in ("quit", "exit"): | |
| return ["__QUIT__"] | |
| if stripped == "": | |
| break | |
| if stripped.startswith("#"): | |
| continue | |
| lines.append(stripped) | |
| # De-dupe while preserving order | |
| seen = set() | |
| out = [] | |
| for p in lines: | |
| if p not in seen: | |
| seen.add(p) | |
| out.append(p) | |
| return out | |
| def resolve_input_path(root: Path, raw: str) -> Path: | |
| p = Path(raw).expanduser() | |
| if p.is_absolute(): | |
| return p.resolve() | |
| return (root / p).resolve() | |
| def write_bundle( | |
| root: Path, | |
| files: list[Path], | |
| out_path: Path, | |
| include_binary_note: bool, | |
| max_bytes_per_file: int, | |
| max_total_bytes: int | |
| ) -> None: | |
| total_read = 0 | |
| # Prepare rel paths for tree listing | |
| rels = [rel_for_display(root, f) for f in files] | |
| rels_sorted = sorted(rels, key=str.lower) | |
| with out_path.open("w", encoding="utf-8", newline="\n") as out: | |
| out.write("Directory structure\n") | |
| out.write(build_tree_listing(rels_sorted)) | |
| out.write("\n\n") | |
| for f in files: | |
| display_path = rel_for_display(root, f) | |
| out.write("---\n") | |
| out.write(f"{display_path}\n") | |
| out.write("---\n") | |
| if is_binary_file(f): | |
| if include_binary_note: | |
| out.write("[binary file skipped: contains NUL bytes]\n") | |
| out.write("---\n\n") | |
| else: | |
| out.write("[skipped: binary file]\n") | |
| out.write("---\n\n") | |
| continue | |
| try: | |
| data = f.read_bytes() | |
| except Exception as e: | |
| out.write(f"[skipped: read error: {e}]\n") | |
| out.write("---\n\n") | |
| continue | |
| truncated = False | |
| if len(data) > max_bytes_per_file: | |
| data = data[:max_bytes_per_file] | |
| truncated = True | |
| if total_read + len(data) > max_total_bytes: | |
| out.write("[stopped: max total bytes reached]\n") | |
| out.write("---\n\n") | |
| break | |
| total_read += len(data) | |
| text = data.decode("utf-8", errors="replace") | |
| out.write(text) | |
| if truncated: | |
| out.write("\n\n[truncated]\n") | |
| if not text.endswith("\n"): | |
| out.write("\n") | |
| out.write("---\n\n") | |
| def main(): | |
| ap = argparse.ArgumentParser( | |
| description="Interactive bundler: run with root, then paste lists of file paths; outputs prompt.txt each time." | |
| ) | |
| ap.add_argument("root", help="Project root directory") | |
| ap.add_argument("--output", default="prompt.txt", help="Main output filename (default: prompt.txt)") | |
| ap.add_argument("--no-archive", action="store_true", help="Disable writing prompt_0001.txt, prompt_0002.txt, ...") | |
| ap.add_argument("--include-binary-note", action="store_true", help="Write a note for binary files instead of skipping silently") | |
| ap.add_argument("--max-bytes-per-file", type=int, default=2_000_000, help="Max bytes to read per file (default: 2,000,000)") | |
| ap.add_argument("--max-total-bytes", type=int, default=30_000_000, help="Max total bytes across all files (default: 30,000,000)") | |
| ap.add_argument("--exclude-dir", action="append", default=[], help="Dir name to exclude if user pastes dir paths (rare). Can repeat.") | |
| args = ap.parse_args() | |
| root = Path(args.root).expanduser().resolve() | |
| if not root.exists() or not root.is_dir(): | |
| raise SystemExit(f"Root path is not a directory: {root}") | |
| base_dir = Path.cwd() | |
| main_out = (base_dir / args.output).resolve() | |
| print(f"Root: {root}") | |
| print(f"Will write main output to: {main_out}") | |
| if not args.no_archive: | |
| print("Will also write archived outputs: prompt_0001.txt, prompt_0002.txt, ...") | |
| print("") | |
| exclude_dirs = set(DEFAULT_EXCLUDE_DIRS) | |
| exclude_dirs.update(args.exclude_dir) | |
| while True: | |
| raw_paths = read_paths_from_stdin() | |
| if raw_paths == ["__QUIT__"]: | |
| print("Bye.") | |
| break | |
| if not raw_paths: | |
| print("No paths provided. Paste again (or type quit).") | |
| continue | |
| resolved_files = [] | |
| skipped = [] | |
| for raw in raw_paths: | |
| p = resolve_input_path(root, raw) | |
| # If user pastes a directory, skip (you said you'll paste filepaths) | |
| if p.exists() and p.is_dir(): | |
| # optionally ignore excluded dirs | |
| if p.name in exclude_dirs: | |
| skipped.append((raw, "excluded directory")) | |
| else: | |
| skipped.append((raw, "is a directory (paste files, not dirs)")) | |
| continue | |
| if not p.exists(): | |
| skipped.append((raw, "not found")) | |
| continue | |
| if not p.is_file(): | |
| skipped.append((raw, "not a file")) | |
| continue | |
| resolved_files.append(p) | |
| # De-dupe resolved files while preserving order | |
| seen = set() | |
| unique_files = [] | |
| for f in resolved_files: | |
| rf = str(f.resolve()) | |
| if rf not in seen: | |
| seen.add(rf) | |
| unique_files.append(f.resolve()) | |
| if not unique_files: | |
| print("No valid files to bundle.") | |
| if skipped: | |
| print("Skipped:") | |
| for raw, reason in skipped[:30]: | |
| print(f" - {raw} ({reason})") | |
| if len(skipped) > 30: | |
| print(f" ... and {len(skipped) - 30} more") | |
| continue | |
| # Write outputs | |
| write_bundle( | |
| root=root, | |
| files=unique_files, | |
| out_path=main_out, | |
| include_binary_note=args.include_binary_note, | |
| max_bytes_per_file=args.max_bytes_per_file, | |
| max_total_bytes=args.max_total_bytes | |
| ) | |
| archive_out = None | |
| if not args.no_archive: | |
| archive_out = next_archive_name(base_dir=base_dir) | |
| write_bundle( | |
| root=root, | |
| files=unique_files, | |
| out_path=archive_out, | |
| include_binary_note=args.include_binary_note, | |
| max_bytes_per_file=args.max_bytes_per_file, | |
| max_total_bytes=args.max_total_bytes | |
| ) | |
| print(f"\nBundled {len(unique_files)} file(s).") | |
| print(f"Main output: {main_out}") | |
| if archive_out: | |
| print(f"Archive: {archive_out}") | |
| if skipped: | |
| print("Skipped:") | |
| for raw, reason in skipped[:30]: | |
| print(f" - {raw} ({reason})") | |
| if len(skipped) > 30: | |
| print(f" ... and {len(skipped) - 30} more") | |
| print("\nPaste another list, or type quit.\n") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment