Skip to content

Instantly share code, notes, and snippets.

@AlejandroAkbal
Created August 28, 2025 02:23
Show Gist options
  • Select an option

  • Save AlejandroAkbal/e5961b967385f869c49f80438a5c18d1 to your computer and use it in GitHub Desktop.

Select an option

Save AlejandroAkbal/e5961b967385f869c49f80438a5c18d1 to your computer and use it in GitHub Desktop.
Python3 tool made with AI to transform a Wallabag export into a Instapaper CSV import
#!/usr/bin/env python3
"""
Wallabag to Instapaper CSV Converter
Converts Wallabag CSV export to Instapaper import format
"""
import csv
import sys
import html
import re
from datetime import datetime
from pathlib import Path
csv.field_size_limit(10**7) # increase limit to 10 million characters
def clean_html_content(content):
"""Extract readable text from HTML content for the Selection field"""
if not content:
return ""
# Decode HTML entities
content = html.unescape(content)
# Remove HTML tags but keep the text
content = re.sub(r'<[^>]+>', ' ', content)
# Clean up whitespace
content = re.sub(r'\s+', ' ', content).strip()
# Limit to first 500 characters for selection preview
if len(content) > 500:
content = content[:500] + "..."
return content
def convert_date_to_timestamp(date_str):
"""Convert Wallabag date format to Unix timestamp"""
if not date_str:
return ""
try:
# Wallabag format: "05/08/2025 08:12:42"
dt = datetime.strptime(date_str, "%d/%m/%Y %H:%M:%S")
return str(int(dt.timestamp()))
except ValueError:
# If parsing fails, return empty string
return ""
def clean_title(title):
"""Clean up title field"""
if not title:
return ""
# Decode HTML entities
title = html.unescape(title)
# Remove any remaining HTML tags
title = re.sub(r'<[^>]+>', '', title)
return title.strip()
def convert_wallabag_to_instapaper(input_file, output_file):
"""Convert Wallabag CSV to Instapaper CSV format"""
if not Path(input_file).exists():
print(f"Error: Input file '{input_file}' not found.")
return False
try:
# Read Wallabag CSV
with open(input_file, 'r', encoding='utf-8') as infile:
# Use semicolon delimiter for Wallabag
reader = csv.DictReader(infile, delimiter=';')
# Prepare output data
output_rows = []
for row in reader:
# Skip empty rows
if not row.get('URL') or not row.get('Title'):
continue
# Map fields from Wallabag to Instapaper format
instapaper_row = {
'URL': row.get('URL', '').strip(),
'Title': clean_title(row.get('Title', '')),
'Selection': clean_html_content(row.get('Content', '')),
'Folder': 'Unread', # Default folder
'Timestamp': convert_date_to_timestamp(row.get('Creation date', '')),
'Tags': '[]' # Empty tags array as string
}
output_rows.append(instapaper_row)
# Write Instapaper CSV
with open(output_file, 'w', encoding='utf-8', newline='') as outfile:
fieldnames = ['URL', 'Title', 'Selection', 'Folder', 'Timestamp', 'Tags']
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(output_rows)
print(f"βœ… Successfully converted {len(output_rows)} entries")
print(f"πŸ“ Output saved to: {output_file}")
return True
except Exception as e:
print(f"❌ Error during conversion: {e}")
return False
def main():
"""Main function"""
if len(sys.argv) != 3:
print("Usage: python3 wallabag_to_instapaper.py <input_wallabag.csv> <output_instapaper.csv>")
print("")
print("Example:")
print(" python3 wallabag_to_instapaper.py Wallabag.csv Instapaper_Import.csv")
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2]
print(f"πŸ”„ Converting Wallabag CSV to Instapaper format...")
print(f"πŸ“‚ Input: {input_file}")
print(f"πŸ“ Output: {output_file}")
print()
success = convert_wallabag_to_instapaper(input_file, output_file)
if success:
print()
print("πŸŽ‰ Conversion completed successfully!")
print("πŸ“‹ You can now import the output file into Instapaper.")
else:
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment