Created
August 28, 2025 02:23
-
-
Save AlejandroAkbal/e5961b967385f869c49f80438a5c18d1 to your computer and use it in GitHub Desktop.
Python3 tool made with AI to transform a Wallabag export into a Instapaper CSV import
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Wallabag to Instapaper CSV Converter | |
| Converts Wallabag CSV export to Instapaper import format | |
| """ | |
| import csv | |
| import sys | |
| import html | |
| import re | |
| from datetime import datetime | |
| from pathlib import Path | |
| csv.field_size_limit(10**7) # increase limit to 10 million characters | |
| def clean_html_content(content): | |
| """Extract readable text from HTML content for the Selection field""" | |
| if not content: | |
| return "" | |
| # Decode HTML entities | |
| content = html.unescape(content) | |
| # Remove HTML tags but keep the text | |
| content = re.sub(r'<[^>]+>', ' ', content) | |
| # Clean up whitespace | |
| content = re.sub(r'\s+', ' ', content).strip() | |
| # Limit to first 500 characters for selection preview | |
| if len(content) > 500: | |
| content = content[:500] + "..." | |
| return content | |
| def convert_date_to_timestamp(date_str): | |
| """Convert Wallabag date format to Unix timestamp""" | |
| if not date_str: | |
| return "" | |
| try: | |
| # Wallabag format: "05/08/2025 08:12:42" | |
| dt = datetime.strptime(date_str, "%d/%m/%Y %H:%M:%S") | |
| return str(int(dt.timestamp())) | |
| except ValueError: | |
| # If parsing fails, return empty string | |
| return "" | |
| def clean_title(title): | |
| """Clean up title field""" | |
| if not title: | |
| return "" | |
| # Decode HTML entities | |
| title = html.unescape(title) | |
| # Remove any remaining HTML tags | |
| title = re.sub(r'<[^>]+>', '', title) | |
| return title.strip() | |
| def convert_wallabag_to_instapaper(input_file, output_file): | |
| """Convert Wallabag CSV to Instapaper CSV format""" | |
| if not Path(input_file).exists(): | |
| print(f"Error: Input file '{input_file}' not found.") | |
| return False | |
| try: | |
| # Read Wallabag CSV | |
| with open(input_file, 'r', encoding='utf-8') as infile: | |
| # Use semicolon delimiter for Wallabag | |
| reader = csv.DictReader(infile, delimiter=';') | |
| # Prepare output data | |
| output_rows = [] | |
| for row in reader: | |
| # Skip empty rows | |
| if not row.get('URL') or not row.get('Title'): | |
| continue | |
| # Map fields from Wallabag to Instapaper format | |
| instapaper_row = { | |
| 'URL': row.get('URL', '').strip(), | |
| 'Title': clean_title(row.get('Title', '')), | |
| 'Selection': clean_html_content(row.get('Content', '')), | |
| 'Folder': 'Unread', # Default folder | |
| 'Timestamp': convert_date_to_timestamp(row.get('Creation date', '')), | |
| 'Tags': '[]' # Empty tags array as string | |
| } | |
| output_rows.append(instapaper_row) | |
| # Write Instapaper CSV | |
| with open(output_file, 'w', encoding='utf-8', newline='') as outfile: | |
| fieldnames = ['URL', 'Title', 'Selection', 'Folder', 'Timestamp', 'Tags'] | |
| writer = csv.DictWriter(outfile, fieldnames=fieldnames) | |
| writer.writeheader() | |
| writer.writerows(output_rows) | |
| print(f"β Successfully converted {len(output_rows)} entries") | |
| print(f"π Output saved to: {output_file}") | |
| return True | |
| except Exception as e: | |
| print(f"β Error during conversion: {e}") | |
| return False | |
| def main(): | |
| """Main function""" | |
| if len(sys.argv) != 3: | |
| print("Usage: python3 wallabag_to_instapaper.py <input_wallabag.csv> <output_instapaper.csv>") | |
| print("") | |
| print("Example:") | |
| print(" python3 wallabag_to_instapaper.py Wallabag.csv Instapaper_Import.csv") | |
| sys.exit(1) | |
| input_file = sys.argv[1] | |
| output_file = sys.argv[2] | |
| print(f"π Converting Wallabag CSV to Instapaper format...") | |
| print(f"π Input: {input_file}") | |
| print(f"π Output: {output_file}") | |
| print() | |
| success = convert_wallabag_to_instapaper(input_file, output_file) | |
| if success: | |
| print() | |
| print("π Conversion completed successfully!") | |
| print("π You can now import the output file into Instapaper.") | |
| else: | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment