Skip to content

Instantly share code, notes, and snippets.

@ppo
Created January 21, 2026 17:16
Show Gist options
  • Select an option

  • Save ppo/0aa118cfee8c831e2b0ae6677e32465d to your computer and use it in GitHub Desktop.

Select an option

Save ppo/0aa118cfee8c831e2b0ae6677e32465d to your computer and use it in GitHub Desktop.
Script to clean Notion export by standardizing folder/file names.
#!/usr/bin/env python
"""
Clean Notion export by standardizing folder/file names.
- Keep only `_all.csv` files (remove duplicates)
- Rename folders to match their corresponding `.{md,csv}` file UUID suffixes
"""
import argparse
import os
import re
from glob import glob
from pathlib import Path
UUID_SUFFIX_RE = r" [0-9a-f]{32}"
FILE_EXT_RE = r"(csv|md)"
# ANSI color codes
RESET = "\033[0m"
BOLD = "\033[1m"
RED = "\033[31m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
CYAN = "\033[36m"
DIM_GRAY = "\033[90m"
WHITE = "\033[97m"
HEADING = f"{BOLD}{YELLOW}"
def h1(text):
print(f"\n{HEADING}=== {text} ==={RESET}\n")
def h2(text):
print(f"{HEADING}{text}{RESET}")
def highlight(text):
print(f"{WHITE}{text}{RESET}")
def get_folders_by_depth_desc(base_path="."):
"""Get all folders sorted by depth (deepest first)"""
folders = []
base = Path(base_path)
for root, dirs, files in os.walk(base):
root_path = Path(root)
for d in dirs:
folder_path = root_path / d
# Get relative path from base
rel_path = folder_path.relative_to(base)
depth = len(rel_path.parts)
folders.append((depth, rel_path))
# Sort by depth descending
folders.sort(key=lambda x: x[0], reverse=True)
return [f[1] for f in folders]
def process_csv():
h2("Process CSV: Keep Only '*_all.csv'")
for csv_path_str in glob("**/*_all.csv", recursive=True):
csv_path = Path(csv_path_str)
highlight(csv_path)
other_csv_path = csv_path.with_name(csv_path.name.replace("_all.csv", ".csv"))
if other_csv_path.is_file():
print(f" Deleting {other_csv_path}")
other_csv_path.unlink()
h2("Process CSV: Rename '*_all.csv'")
for csv_path_str in glob("**/*_all.csv", recursive=True):
csv_path = Path(csv_path_str)
print(csv_path)
new_path = csv_path.with_name(csv_path.name.replace("_all", ""))
csv_path.rename(new_path)
def process_folders():
h2("Process Folders")
errors = []
# Loop over folders (deepest first)
for folder_path in get_folders_by_depth_desc():
# Skip if already has UUID
if re.search(fr"{UUID_SUFFIX_RE}$", str(folder_path)):
print(f"{DIM_GRAY}Skip {folder_path}{RESET}")
continue
highlight(folder_path)
# Find matching .md or .csv files with UUID
pattern = re.compile(fr"{re.escape(str(folder_path))}{UUID_SUFFIX_RE}\.{FILE_EXT_RE}$")
# Get potential files
candidates = glob(f"{folder_path}*")
files = [f for f in candidates if pattern.match(f)]
match len(files):
case 0:
print(f" 🔴 {RED}Not found{RESET}")
errors.append(f"Not Found: {CYAN}{folder_path}{RED}")
case 1:
# Extract UUID suffix
match = re.search(fr"({UUID_SUFFIX_RE})\.{FILE_EXT_RE}$", files[0])
if match:
suffix = match.group(1)
new_folder_path = Path(f"{folder_path}{suffix}")
print(f" 🟢 Found: {Path(files[0]).name}")
print(f" {GREEN}Rename: {new_folder_path}{RESET}")
folder_path.rename(new_folder_path)
case _:
for f in files:
print(f" 🔴 {RED}{f}{RESET}")
errors.append(f"Multiple: {CYAN}{folder_path}{RED}")
print()
if errors:
print(f"{BOLD}{RED}ERRORS:{RESET}")
for error in errors:
print(f"{RED}{error}{RESET}")
def parse_args():
parser = argparse.ArgumentParser(description="Clean Notion export by standardizing folder/file names.")
parser.add_argument("folders", nargs="+", help="PDF files to convert")
args = parser.parse_args()
# Sanitizing
for i, folder in enumerate(args.folders):
args.folders[i] = Path(folder).resolve()
return args
def main():
args = parse_args()
for folder in args.folders:
h1(f"Cleaning {CYAN}{folder}{HEADING}")
if not folder.exists():
print(f"{RED}ERROR: Folder '{CYAN}{folder}{RED}' not found{RESET}")
continue
os.chdir(folder)
process_csv()
process_folders()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment