Skip to content

Instantly share code, notes, and snippets.

@victorespigares
Last active January 10, 2026 19:55
Show Gist options
  • Select an option

  • Save victorespigares/9c63aedecb9293934950f5b9c7c0c484 to your computer and use it in GitHub Desktop.

Select an option

Save victorespigares/9c63aedecb9293934950f5b9c7c0c484 to your computer and use it in GitHub Desktop.
Convert nvAlt RTF files into FSNotes Markdown via plain text. Needs textutil (macOS). Preserves timestamp, adds title tags in front matter.
#!/usr/bin/env python3
"""
Convert nvALT RTF notes to Markdown using textutil.
- Uses textutil for clean, fast conversion
- Extracts tags from filename [tag], converts to camelCase
- Escapes hashtags to prevent FSNotes tag interpretation
- Preserves file timestamps and original filename
- Only adds front matter if there are tags
"""
import sys
import subprocess
import re
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
def extract_tags_from_filename(filename: str) -> list:
"""
Extract [tag] patterns from filename and convert to camelCase.
[metodo arde] -> metodoArde
[metodo arde,formacion] -> metodoArde, formacion
"""
tag_blocks = re.findall(r'\[([^\]]+)\]', filename)
tags = []
for block in tag_blocks:
parts = [p.strip() for p in block.split(',')]
for part in parts:
words = part.split()
if len(words) > 1:
camel = words[0].lower() + ''.join(w.capitalize() for w in words[1:])
else:
camel = part.lower()
tags.append(camel)
return tags
def escape_hashtags(content: str) -> str:
"""
Escape hashtags followed by word characters to prevent FSNotes tag interpretation.
Converts #word to # word, but preserves markdown headers.
"""
lines = content.split('\n')
result = []
for line in lines:
# Skip markdown headers
if line.startswith('# ') or line.startswith('##') or line.startswith('###'):
result.append(line)
continue
# Replace #word with # word
modified_line = re.sub(
r'#([a-zA-Z_][a-zA-Z0-9_]*)',
r'# \1',
line
)
result.append(modified_line)
return '\n'.join(result)
def convert_single_rtf(rtf_file_str: str, output_dir_str: str) -> tuple:
"""Convert a single RTF file to Markdown using textutil."""
try:
rtf_file = Path(rtf_file_str)
output_dir = Path(output_dir_str)
# Get file modification time
mtime = os.path.getmtime(rtf_file)
# Extract tags from filename
filename_without_ext = rtf_file.stem
file_tags = extract_tags_from_filename(filename_without_ext)
# Keep original filename (with tags and all)
md_filename = rtf_file.stem + ".md"
md_file = output_dir / md_filename
# Use textutil to convert RTF to plain text
result = subprocess.run(
[
'textutil',
'-convert', 'txt',
'-encoding', 'UTF-8',
'-output', str(md_file),
str(rtf_file)
],
capture_output=True,
text=True,
timeout=30
)
if result.returncode != 0:
return (rtf_file.name, False, f"textutil failed: {result.stderr}")
# Read the converted content
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# Clean up content
content = content.strip()
# Remove excessive blank lines (more than 2 consecutive)
content = re.sub(r'\n{3,}', '\n\n', content)
# Escape hashtags in code/comments
content = escape_hashtags(content)
# Only add front matter if there are tags
if file_tags:
front_matter_lines = []
front_matter_lines.append('---')
tags_str = ' '.join([f'#{tag}' for tag in file_tags])
front_matter_lines.append(f'tags: {tags_str}')
front_matter_lines.append('---')
front_matter_lines.append('\n') # Empty line after front matter
final_content = '\n'.join(front_matter_lines) + content
else:
# No tags, no front matter
final_content = content
# Write back
with open(md_file, 'w', encoding='utf-8') as f:
f.write(final_content)
# Preserve original file timestamp
os.utime(md_file, (mtime, mtime))
return (rtf_file.name, True, None)
except Exception as e:
return (rtf_file.name if 'rtf_file' in locals() else "unknown", False, str(e))
def convert_rtf_to_markdown(input_dir: str, output_dir: str, max_workers: int = 4):
"""Batch convert all .rtf files to Markdown with parallel processing."""
input_path = Path(input_dir).expanduser()
output_path = Path(output_dir).expanduser()
output_path.mkdir(parents=True, exist_ok=True)
rtf_files = list(input_path.glob("*.rtf"))
if not rtf_files:
print(f"❌ No .rtf files found in {input_path}")
return
print(f"πŸ”„ Found {len(rtf_files)} .rtf files to convert")
print(f"πŸ“ Input: {input_path}")
print(f"πŸ“ Output: {output_path}")
print(f"⚑ Using {max_workers} parallel workers\n")
success_count = 0
error_count = 0
errors = []
rtf_files_str = [str(f) for f in rtf_files]
output_dir_str = str(output_path)
with ProcessPoolExecutor(max_workers=max_workers) as executor:
futures = {
executor.submit(convert_single_rtf, rtf_file_str, output_dir_str): rtf_file_str
for rtf_file_str in rtf_files_str
}
for idx, future in enumerate(as_completed(futures), 1):
filename, success, error = future.result()
if success:
print(f"βœ… [{idx:4d}/{len(rtf_files)}] {filename}")
success_count += 1
else:
print(f"❌ [{idx:4d}/{len(rtf_files)}] {filename} - Error: {error}")
errors.append((filename, error))
error_count += 1
print(f"\n{'='*70}")
print(f"πŸ“Š Conversion complete!")
print(f" βœ… Success: {success_count}")
print(f" ❌ Errors: {error_count}")
print(f" πŸ“ Output: {output_path}")
print(f"{'='*70}")
if errors:
print(f"\n⚠️ Failed conversions (first 10):")
for filename, error in errors[:10]:
print(f" β€’ {filename}")
if len(errors) > 10:
print(f" ... and {len(errors) - 10} more")
def main():
"""Main entry point."""
input_dir = "~/Library/Application Support/Notational Data"
output_dir = "~/Library/Application Support/Notational Data/md-out"
if len(sys.argv) > 1:
input_dir = sys.argv[1]
if len(sys.argv) > 2:
output_dir = sys.argv[2]
convert_rtf_to_markdown(input_dir, output_dir)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment