Last active
December 2, 2025 11:17
-
-
Save dzogrim/5858b3b084df856daa693fa436713d0b to your computer and use it in GitHub Desktop.
Outil CLI conçu pour analyser, filtrer et nettoyer vos interactions publiques à partir de l’export CSV fourni par LinkedIn
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # linkedin_export_parser_sweeper.py | |
| # Version 0.2 – 2025-12-02 | |
| # | |
| # CLI tool designed to parse, filter, and clean up your public LinkedIn | |
| # interactions (comments, reactions, shares) based on the official CSV data | |
| # export. Provides interactive triage, persistent SQLite-based whitelist/ | |
| # blacklist, and semi-automated comment removal through Safari integration. | |
| # | |
| # (c) 2025 dzogrim | |
| # License: MIT | |
| import argparse | |
| import csv | |
| import sqlite3 | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| from datetime import datetime, timedelta | |
| # May want to add Votes.csv and Rich_Media.csv | |
| CSV_FILES = { | |
| "comments": ["Comments.csv"], | |
| "reactions": ["Reactions.csv"], | |
| "shares": ["Shares.csv", "InstantReposts.csv"], | |
| } | |
| # ---------- CSV helpers ---------- | |
| def pick_first(row, keys): | |
| for k in keys: | |
| if k in row and row[k]: | |
| return row[k] | |
| return None | |
| def guess_url(row, rtype): | |
| url = pick_first( | |
| row, | |
| [ | |
| "Permalink", | |
| "URL", | |
| "Url", | |
| "Link", | |
| "Share URL", | |
| "Share Url", | |
| "shareUrl", | |
| "Original URL", | |
| "Original Url", | |
| ], | |
| ) | |
| if url: | |
| return url | |
| for k, v in row.items(): | |
| if v and "url" in k.lower(): | |
| return v | |
| return None | |
| def guess_text(row, rtype): | |
| text = pick_first( | |
| row, | |
| [ | |
| "Comment", | |
| "Comment Text", | |
| "CommentBody", | |
| "Message", | |
| "Text", | |
| "Share Text", | |
| "Body", | |
| "Title", | |
| "Headline", | |
| "Commentary", | |
| "Share Commentary", | |
| ], | |
| ) | |
| if text: | |
| return text | |
| for k, v in row.items(): | |
| kl = k.lower() | |
| if v and any( | |
| token in kl | |
| for token in ["text", "title", "headline", "comment", "summary", "commentary"] | |
| ): | |
| return v | |
| return None | |
| def summarize_row(row, rtype): | |
| time = pick_first( | |
| row, | |
| ["Created At", "CreatedAt", "createdAt", "Date", "Timestamp"], | |
| ) | |
| url = guess_url(row, rtype) | |
| text = guess_text(row, rtype) | |
| if text and len(text) > 120: | |
| text = text[:120] + "…" | |
| return { | |
| "type": rtype, | |
| "time": time, | |
| "url": url, | |
| "text": text, | |
| } | |
| def read_csv(path: Path, rtype: str): | |
| entries = [] | |
| with path.open("r", encoding="utf-8-sig", newline="") as f: | |
| reader = csv.DictReader(f) | |
| for row in reader: | |
| entries.append(summarize_row(row, rtype)) | |
| return entries | |
| def scan_export_dir(root: Path): | |
| results = {"comments": [], "reactions": [], "shares": []} | |
| for rtype, filenames in CSV_FILES.items(): | |
| for name in filenames: | |
| path = root / name | |
| if not path.is_file(): | |
| continue | |
| entries = read_csv(path, rtype) | |
| results[rtype].extend(entries) | |
| return results | |
| # ---------- date / filtering helpers ---------- | |
| def parse_linkedin_date(s): | |
| if not s: | |
| return None | |
| for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d"): | |
| try: | |
| return datetime.strptime(s, fmt) | |
| except ValueError: | |
| pass | |
| return None | |
| def filter_older_than(entries, days): | |
| cutoff = datetime.now() - timedelta(days=days) | |
| filtered = [] | |
| for e in entries: | |
| d = parse_linkedin_date(e["time"]) | |
| if d and d < cutoff: | |
| filtered.append(e) | |
| return filtered | |
| # ---------- SQLite store for actions (deleted / ignored / keep) ---------- | |
| def init_db(db_path: Path): | |
| db_path.parent.mkdir(parents=True, exist_ok=True) | |
| conn = sqlite3.connect(str(db_path)) | |
| conn.execute( | |
| """ | |
| CREATE TABLE IF NOT EXISTS ignored ( | |
| url TEXT PRIMARY KEY, | |
| type TEXT, | |
| added_at TEXT, | |
| note TEXT | |
| ) | |
| """ | |
| ) | |
| conn.commit() | |
| return conn | |
| def load_ignored_urls(conn): | |
| cur = conn.cursor() | |
| cur.execute("SELECT url FROM ignored") | |
| rows = cur.fetchall() | |
| return {r[0] for r in rows} | |
| def add_ignored(conn, entry, note): | |
| if not entry.get("url"): | |
| return | |
| conn.execute( | |
| "INSERT OR REPLACE INTO ignored(url, type, added_at, note) VALUES (?, ?, ?, ?)", | |
| ( | |
| entry["url"], | |
| entry.get("type") or "", | |
| datetime.now().isoformat(timespec="seconds"), | |
| note, | |
| ), | |
| ) | |
| conn.commit() | |
| # ---------- main ---------- | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description=( | |
| "Dry-run and semi-automatic cleanup of your LinkedIn public " | |
| "interactions based on the CSV export." | |
| ) | |
| ) | |
| parser.add_argument( | |
| "export_dir", | |
| type=Path, | |
| help="Directory containing your LinkedIn export (Complete_LinkedInDataExport_*/).", | |
| ) | |
| parser.add_argument( | |
| "--limit", | |
| type=int, | |
| default=10, | |
| help="Max number of examples to display per type (0 = all, default: 10).", | |
| ) | |
| parser.add_argument( | |
| "--older-than", | |
| type=int, | |
| default=None, | |
| help="Only keep interactions older than N days.", | |
| ) | |
| parser.add_argument( | |
| "--export-urls", | |
| type=Path, | |
| default=None, | |
| help="If set, write filtered interactions to this file (type;date;url).", | |
| ) | |
| parser.add_argument( | |
| "--run-mode", | |
| choices=["safari-comments"], | |
| default=None, | |
| help="Semi-auto mode: 'safari-comments' opens comment URLs in Safari.", | |
| ) | |
| parser.add_argument( | |
| "--db-path", | |
| type=Path, | |
| default=Path("~/.linkedin_cleanup.sqlite"), | |
| help="Path to the SQLite database (default: ~/.linkedin_cleanup.sqlite).", | |
| ) | |
| args = parser.parse_args() | |
| if not args.export_dir.is_dir(): | |
| print(f"{args.export_dir} is not a directory.") | |
| return | |
| # DB | |
| db_path = args.db_path.expanduser() | |
| conn = init_db(db_path) | |
| ignored_urls = load_ignored_urls(conn) | |
| # Read export | |
| results = scan_export_dir(args.export_dir) | |
| # Age filter | |
| if args.older_than: | |
| for rtype in results: | |
| results[rtype] = filter_older_than(results[rtype], args.older_than) | |
| # Filter against DB (anything already recorded, regardless of note) | |
| for rtype in results: | |
| results[rtype] = [ | |
| e for e in results[rtype] | |
| if not (e.get("url") and e["url"] in ignored_urls) | |
| ] | |
| # Optional URL export | |
| if args.export_urls: | |
| with args.export_urls.open("w", encoding="utf-8") as f: | |
| for rtype in ["comments", "reactions", "shares"]: | |
| for e in results[rtype]: | |
| if not e["url"]: | |
| continue | |
| date = e["time"] or "" | |
| f.write(f"{rtype};{date};{e['url']}\n") | |
| print(f"Liste des URL exportée dans : {args.export_urls}") | |
| # ---------- run-mode (Safari + DB: deleted/keep/ignored) ---------- | |
| if args.run_mode == "safari-comments": | |
| # Must be running on macOS | |
| if sys.platform != "darwin": | |
| print("Error: 'safari-comments' run mode is only supported on macOS.") | |
| print(f"Current platform detected: {sys.platform}") | |
| return | |
| comments = [e for e in results["comments"] if e.get("url")] | |
| if args.limit and args.limit > 0: | |
| comments = comments[: args.limit] | |
| total = len(comments) | |
| if not total: | |
| print("No comments to process in this run mode.") | |
| return | |
| print(f"Run-mode safari-comments: {total} potential comment(s).") | |
| print("For each comment:") | |
| print(" y = open in Safari to potentially delete it") | |
| print(" N = keep it and whitelist it (keep, never proposed again)") | |
| print(" i = ignore permanently (stored as 'ignored', never proposed again)") | |
| print(" q = quit this run mode\n") | |
| for idx, e in enumerate(comments, 1): | |
| print(f"[{idx}/{total}]") | |
| if e["time"]: | |
| print(f" Date : {e['time']}") | |
| print(f" URL : {e['url']}") | |
| if e["text"]: | |
| print(f" Text : {e['text']}") | |
| while True: | |
| ans = input("Open in Safari? [y/N/i/q] ").strip().lower() | |
| if ans in ("", "n"): | |
| add_ignored(conn, e, note="keep") | |
| print(" -> kept and stored as 'keep' (whitelist, not proposed again).\n") | |
| break | |
| elif ans == "i": | |
| add_ignored(conn, e, note="ignored") | |
| print(" -> stored as 'ignored' (not proposed again).\n") | |
| break | |
| elif ans == "q": | |
| print("End of run-mode.") | |
| return | |
| elif ans == "y": | |
| print(" -> opening in Safari…") | |
| subprocess.run(["open", "-a", "Safari", e["url"]]) | |
| input("Delete (or not) the comment, then press Enter to continue... ") | |
| while True: | |
| ans2 = input( | |
| "Was the comment deleted? [y/N=keep and never propose again] " | |
| ).strip().lower() | |
| if ans2 in ("", "n"): | |
| add_ignored(conn, e, note="keep") | |
| print(" -> kept and stored as 'keep' (whitelist, not proposed again).\n") | |
| break | |
| elif ans2 == "y": | |
| add_ignored(conn, e, note="deleted") | |
| print(" -> stored as 'deleted' in DB (not proposed again).\n") | |
| break | |
| else: | |
| print("Invalid answer (use 'y' or Enter).") | |
| break | |
| else: | |
| print("Invalid choice.") | |
| print("\nRun-mode safari-comments finished.") | |
| return | |
| # ---------- standard dry-run output ---------- | |
| total = sum(len(v) for v in results.values()) | |
| print("\n=== DRY-RUN SUMMARY ===") | |
| print(f" Comments : {len(results['comments'])}") | |
| print(f" Reactions : {len(results['reactions'])}") | |
| print(f" Shares : {len(results['shares'])}") | |
| print(f" TOTAL : {total}\n") | |
| for rtype, label in [ | |
| ("comments", "COMMENTS"), | |
| ("reactions", "REACTIONS / LIKES"), | |
| ("shares", "SHARES / REPOSTS"), | |
| ]: | |
| entries = results[rtype] | |
| if not entries: | |
| continue | |
| if args.limit and args.limit > 0: | |
| to_show = entries[: args.limit] | |
| header_suffix = f" (examples, max {args.limit})" | |
| else: | |
| to_show = entries | |
| header_suffix = " (all examples)" | |
| print(f"--- {label}{header_suffix} ---") | |
| for i, e in enumerate(to_show, 1): | |
| print(f"[{i}]") | |
| if e["time"]: | |
| print(f" Date : {e['time']}") | |
| if e["url"]: | |
| print(f" URL : {e['url']}") | |
| if e["text"]: | |
| print(f" Text : {e['text']}") | |
| print() | |
| print("Dry run finished. Nothing was modified on LinkedIn.") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment