Last active
January 10, 2026 19:55
-
-
Save victorespigares/9c63aedecb9293934950f5b9c7c0c484 to your computer and use it in GitHub Desktop.
Convert nvAlt RTF files into FSNotes Markdown via plain text. Needs textutil (macOS). Preserves timestamp, adds title tags in front matter.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Convert nvALT RTF notes to Markdown using textutil. | |
| - Uses textutil for clean, fast conversion | |
| - Extracts tags from filename [tag], converts to camelCase | |
| - Escapes hashtags to prevent FSNotes tag interpretation | |
| - Preserves file timestamps and original filename | |
| - Only adds front matter if there are tags | |
| """ | |
| import sys | |
| import subprocess | |
| import re | |
| from pathlib import Path | |
| from concurrent.futures import ProcessPoolExecutor, as_completed | |
| import os | |
| def extract_tags_from_filename(filename: str) -> list: | |
| """ | |
| Extract [tag] patterns from filename and convert to camelCase. | |
| [metodo arde] -> metodoArde | |
| [metodo arde,formacion] -> metodoArde, formacion | |
| """ | |
| tag_blocks = re.findall(r'\[([^\]]+)\]', filename) | |
| tags = [] | |
| for block in tag_blocks: | |
| parts = [p.strip() for p in block.split(',')] | |
| for part in parts: | |
| words = part.split() | |
| if len(words) > 1: | |
| camel = words[0].lower() + ''.join(w.capitalize() for w in words[1:]) | |
| else: | |
| camel = part.lower() | |
| tags.append(camel) | |
| return tags | |
| def escape_hashtags(content: str) -> str: | |
| """ | |
| Escape hashtags followed by word characters to prevent FSNotes tag interpretation. | |
| Converts #word to # word, but preserves markdown headers. | |
| """ | |
| lines = content.split('\n') | |
| result = [] | |
| for line in lines: | |
| # Skip markdown headers | |
| if line.startswith('# ') or line.startswith('##') or line.startswith('###'): | |
| result.append(line) | |
| continue | |
| # Replace #word with # word | |
| modified_line = re.sub( | |
| r'#([a-zA-Z_][a-zA-Z0-9_]*)', | |
| r'# \1', | |
| line | |
| ) | |
| result.append(modified_line) | |
| return '\n'.join(result) | |
| def convert_single_rtf(rtf_file_str: str, output_dir_str: str) -> tuple: | |
| """Convert a single RTF file to Markdown using textutil.""" | |
| try: | |
| rtf_file = Path(rtf_file_str) | |
| output_dir = Path(output_dir_str) | |
| # Get file modification time | |
| mtime = os.path.getmtime(rtf_file) | |
| # Extract tags from filename | |
| filename_without_ext = rtf_file.stem | |
| file_tags = extract_tags_from_filename(filename_without_ext) | |
| # Keep original filename (with tags and all) | |
| md_filename = rtf_file.stem + ".md" | |
| md_file = output_dir / md_filename | |
| # Use textutil to convert RTF to plain text | |
| result = subprocess.run( | |
| [ | |
| 'textutil', | |
| '-convert', 'txt', | |
| '-encoding', 'UTF-8', | |
| '-output', str(md_file), | |
| str(rtf_file) | |
| ], | |
| capture_output=True, | |
| text=True, | |
| timeout=30 | |
| ) | |
| if result.returncode != 0: | |
| return (rtf_file.name, False, f"textutil failed: {result.stderr}") | |
| # Read the converted content | |
| with open(md_file, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| # Clean up content | |
| content = content.strip() | |
| # Remove excessive blank lines (more than 2 consecutive) | |
| content = re.sub(r'\n{3,}', '\n\n', content) | |
| # Escape hashtags in code/comments | |
| content = escape_hashtags(content) | |
| # Only add front matter if there are tags | |
| if file_tags: | |
| front_matter_lines = [] | |
| front_matter_lines.append('---') | |
| tags_str = ' '.join([f'#{tag}' for tag in file_tags]) | |
| front_matter_lines.append(f'tags: {tags_str}') | |
| front_matter_lines.append('---') | |
| front_matter_lines.append('\n') # Empty line after front matter | |
| final_content = '\n'.join(front_matter_lines) + content | |
| else: | |
| # No tags, no front matter | |
| final_content = content | |
| # Write back | |
| with open(md_file, 'w', encoding='utf-8') as f: | |
| f.write(final_content) | |
| # Preserve original file timestamp | |
| os.utime(md_file, (mtime, mtime)) | |
| return (rtf_file.name, True, None) | |
| except Exception as e: | |
| return (rtf_file.name if 'rtf_file' in locals() else "unknown", False, str(e)) | |
| def convert_rtf_to_markdown(input_dir: str, output_dir: str, max_workers: int = 4): | |
| """Batch convert all .rtf files to Markdown with parallel processing.""" | |
| input_path = Path(input_dir).expanduser() | |
| output_path = Path(output_dir).expanduser() | |
| output_path.mkdir(parents=True, exist_ok=True) | |
| rtf_files = list(input_path.glob("*.rtf")) | |
| if not rtf_files: | |
| print(f"β No .rtf files found in {input_path}") | |
| return | |
| print(f"π Found {len(rtf_files)} .rtf files to convert") | |
| print(f"π Input: {input_path}") | |
| print(f"π Output: {output_path}") | |
| print(f"β‘ Using {max_workers} parallel workers\n") | |
| success_count = 0 | |
| error_count = 0 | |
| errors = [] | |
| rtf_files_str = [str(f) for f in rtf_files] | |
| output_dir_str = str(output_path) | |
| with ProcessPoolExecutor(max_workers=max_workers) as executor: | |
| futures = { | |
| executor.submit(convert_single_rtf, rtf_file_str, output_dir_str): rtf_file_str | |
| for rtf_file_str in rtf_files_str | |
| } | |
| for idx, future in enumerate(as_completed(futures), 1): | |
| filename, success, error = future.result() | |
| if success: | |
| print(f"β [{idx:4d}/{len(rtf_files)}] {filename}") | |
| success_count += 1 | |
| else: | |
| print(f"β [{idx:4d}/{len(rtf_files)}] {filename} - Error: {error}") | |
| errors.append((filename, error)) | |
| error_count += 1 | |
| print(f"\n{'='*70}") | |
| print(f"π Conversion complete!") | |
| print(f" β Success: {success_count}") | |
| print(f" β Errors: {error_count}") | |
| print(f" π Output: {output_path}") | |
| print(f"{'='*70}") | |
| if errors: | |
| print(f"\nβ οΈ Failed conversions (first 10):") | |
| for filename, error in errors[:10]: | |
| print(f" β’ {filename}") | |
| if len(errors) > 10: | |
| print(f" ... and {len(errors) - 10} more") | |
| def main(): | |
| """Main entry point.""" | |
| input_dir = "~/Library/Application Support/Notational Data" | |
| output_dir = "~/Library/Application Support/Notational Data/md-out" | |
| if len(sys.argv) > 1: | |
| input_dir = sys.argv[1] | |
| if len(sys.argv) > 2: | |
| output_dir = sys.argv[2] | |
| convert_rtf_to_markdown(input_dir, output_dir) | |
| if __name__ == "__main__": | |
| main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment