Skip to content

Instantly share code, notes, and snippets.

@dzogrim
Last active December 2, 2025 11:17
Show Gist options
  • Select an option

  • Save dzogrim/5858b3b084df856daa693fa436713d0b to your computer and use it in GitHub Desktop.

Select an option

Save dzogrim/5858b3b084df856daa693fa436713d0b to your computer and use it in GitHub Desktop.
Outil CLI conçu pour analyser, filtrer et nettoyer vos interactions publiques à partir de l’export CSV fourni par LinkedIn
#!/usr/bin/env python3
# linkedin_export_parser_sweeper.py
# Version 0.2 – 2025-12-02
#
# CLI tool designed to parse, filter, and clean up your public LinkedIn
# interactions (comments, reactions, shares) based on the official CSV data
# export. Provides interactive triage, persistent SQLite-based whitelist/
# blacklist, and semi-automated comment removal through Safari integration.
#
# (c) 2025 dzogrim
# License: MIT
import argparse
import csv
import sqlite3
import subprocess
import sys
from pathlib import Path
from datetime import datetime, timedelta
# May want to add Votes.csv and Rich_Media.csv
CSV_FILES = {
"comments": ["Comments.csv"],
"reactions": ["Reactions.csv"],
"shares": ["Shares.csv", "InstantReposts.csv"],
}
# ---------- CSV helpers ----------
def pick_first(row, keys):
for k in keys:
if k in row and row[k]:
return row[k]
return None
def guess_url(row, rtype):
url = pick_first(
row,
[
"Permalink",
"URL",
"Url",
"Link",
"Share URL",
"Share Url",
"shareUrl",
"Original URL",
"Original Url",
],
)
if url:
return url
for k, v in row.items():
if v and "url" in k.lower():
return v
return None
def guess_text(row, rtype):
text = pick_first(
row,
[
"Comment",
"Comment Text",
"CommentBody",
"Message",
"Text",
"Share Text",
"Body",
"Title",
"Headline",
"Commentary",
"Share Commentary",
],
)
if text:
return text
for k, v in row.items():
kl = k.lower()
if v and any(
token in kl
for token in ["text", "title", "headline", "comment", "summary", "commentary"]
):
return v
return None
def summarize_row(row, rtype):
time = pick_first(
row,
["Created At", "CreatedAt", "createdAt", "Date", "Timestamp"],
)
url = guess_url(row, rtype)
text = guess_text(row, rtype)
if text and len(text) > 120:
text = text[:120] + "…"
return {
"type": rtype,
"time": time,
"url": url,
"text": text,
}
def read_csv(path: Path, rtype: str):
entries = []
with path.open("r", encoding="utf-8-sig", newline="") as f:
reader = csv.DictReader(f)
for row in reader:
entries.append(summarize_row(row, rtype))
return entries
def scan_export_dir(root: Path):
results = {"comments": [], "reactions": [], "shares": []}
for rtype, filenames in CSV_FILES.items():
for name in filenames:
path = root / name
if not path.is_file():
continue
entries = read_csv(path, rtype)
results[rtype].extend(entries)
return results
# ---------- date / filtering helpers ----------
def parse_linkedin_date(s):
if not s:
return None
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d"):
try:
return datetime.strptime(s, fmt)
except ValueError:
pass
return None
def filter_older_than(entries, days):
cutoff = datetime.now() - timedelta(days=days)
filtered = []
for e in entries:
d = parse_linkedin_date(e["time"])
if d and d < cutoff:
filtered.append(e)
return filtered
# ---------- SQLite store for actions (deleted / ignored / keep) ----------
def init_db(db_path: Path):
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(db_path))
conn.execute(
"""
CREATE TABLE IF NOT EXISTS ignored (
url TEXT PRIMARY KEY,
type TEXT,
added_at TEXT,
note TEXT
)
"""
)
conn.commit()
return conn
def load_ignored_urls(conn):
cur = conn.cursor()
cur.execute("SELECT url FROM ignored")
rows = cur.fetchall()
return {r[0] for r in rows}
def add_ignored(conn, entry, note):
if not entry.get("url"):
return
conn.execute(
"INSERT OR REPLACE INTO ignored(url, type, added_at, note) VALUES (?, ?, ?, ?)",
(
entry["url"],
entry.get("type") or "",
datetime.now().isoformat(timespec="seconds"),
note,
),
)
conn.commit()
# ---------- main ----------
def main():
parser = argparse.ArgumentParser(
description=(
"Dry-run and semi-automatic cleanup of your LinkedIn public "
"interactions based on the CSV export."
)
)
parser.add_argument(
"export_dir",
type=Path,
help="Directory containing your LinkedIn export (Complete_LinkedInDataExport_*/).",
)
parser.add_argument(
"--limit",
type=int,
default=10,
help="Max number of examples to display per type (0 = all, default: 10).",
)
parser.add_argument(
"--older-than",
type=int,
default=None,
help="Only keep interactions older than N days.",
)
parser.add_argument(
"--export-urls",
type=Path,
default=None,
help="If set, write filtered interactions to this file (type;date;url).",
)
parser.add_argument(
"--run-mode",
choices=["safari-comments"],
default=None,
help="Semi-auto mode: 'safari-comments' opens comment URLs in Safari.",
)
parser.add_argument(
"--db-path",
type=Path,
default=Path("~/.linkedin_cleanup.sqlite"),
help="Path to the SQLite database (default: ~/.linkedin_cleanup.sqlite).",
)
args = parser.parse_args()
if not args.export_dir.is_dir():
print(f"{args.export_dir} is not a directory.")
return
# DB
db_path = args.db_path.expanduser()
conn = init_db(db_path)
ignored_urls = load_ignored_urls(conn)
# Read export
results = scan_export_dir(args.export_dir)
# Age filter
if args.older_than:
for rtype in results:
results[rtype] = filter_older_than(results[rtype], args.older_than)
# Filter against DB (anything already recorded, regardless of note)
for rtype in results:
results[rtype] = [
e for e in results[rtype]
if not (e.get("url") and e["url"] in ignored_urls)
]
# Optional URL export
if args.export_urls:
with args.export_urls.open("w", encoding="utf-8") as f:
for rtype in ["comments", "reactions", "shares"]:
for e in results[rtype]:
if not e["url"]:
continue
date = e["time"] or ""
f.write(f"{rtype};{date};{e['url']}\n")
print(f"Liste des URL exportée dans : {args.export_urls}")
# ---------- run-mode (Safari + DB: deleted/keep/ignored) ----------
if args.run_mode == "safari-comments":
# Must be running on macOS
if sys.platform != "darwin":
print("Error: 'safari-comments' run mode is only supported on macOS.")
print(f"Current platform detected: {sys.platform}")
return
comments = [e for e in results["comments"] if e.get("url")]
if args.limit and args.limit > 0:
comments = comments[: args.limit]
total = len(comments)
if not total:
print("No comments to process in this run mode.")
return
print(f"Run-mode safari-comments: {total} potential comment(s).")
print("For each comment:")
print(" y = open in Safari to potentially delete it")
print(" N = keep it and whitelist it (keep, never proposed again)")
print(" i = ignore permanently (stored as 'ignored', never proposed again)")
print(" q = quit this run mode\n")
for idx, e in enumerate(comments, 1):
print(f"[{idx}/{total}]")
if e["time"]:
print(f" Date : {e['time']}")
print(f" URL : {e['url']}")
if e["text"]:
print(f" Text : {e['text']}")
while True:
ans = input("Open in Safari? [y/N/i/q] ").strip().lower()
if ans in ("", "n"):
add_ignored(conn, e, note="keep")
print(" -> kept and stored as 'keep' (whitelist, not proposed again).\n")
break
elif ans == "i":
add_ignored(conn, e, note="ignored")
print(" -> stored as 'ignored' (not proposed again).\n")
break
elif ans == "q":
print("End of run-mode.")
return
elif ans == "y":
print(" -> opening in Safari…")
subprocess.run(["open", "-a", "Safari", e["url"]])
input("Delete (or not) the comment, then press Enter to continue... ")
while True:
ans2 = input(
"Was the comment deleted? [y/N=keep and never propose again] "
).strip().lower()
if ans2 in ("", "n"):
add_ignored(conn, e, note="keep")
print(" -> kept and stored as 'keep' (whitelist, not proposed again).\n")
break
elif ans2 == "y":
add_ignored(conn, e, note="deleted")
print(" -> stored as 'deleted' in DB (not proposed again).\n")
break
else:
print("Invalid answer (use 'y' or Enter).")
break
else:
print("Invalid choice.")
print("\nRun-mode safari-comments finished.")
return
# ---------- standard dry-run output ----------
total = sum(len(v) for v in results.values())
print("\n=== DRY-RUN SUMMARY ===")
print(f" Comments : {len(results['comments'])}")
print(f" Reactions : {len(results['reactions'])}")
print(f" Shares : {len(results['shares'])}")
print(f" TOTAL : {total}\n")
for rtype, label in [
("comments", "COMMENTS"),
("reactions", "REACTIONS / LIKES"),
("shares", "SHARES / REPOSTS"),
]:
entries = results[rtype]
if not entries:
continue
if args.limit and args.limit > 0:
to_show = entries[: args.limit]
header_suffix = f" (examples, max {args.limit})"
else:
to_show = entries
header_suffix = " (all examples)"
print(f"--- {label}{header_suffix} ---")
for i, e in enumerate(to_show, 1):
print(f"[{i}]")
if e["time"]:
print(f" Date : {e['time']}")
if e["url"]:
print(f" URL : {e['url']}")
if e["text"]:
print(f" Text : {e['text']}")
print()
print("Dry run finished. Nothing was modified on LinkedIn.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment