Last active
January 20, 2026 19:03
-
-
Save petergi/b6a5785d8c113ca3d0ad12c0605c559a to your computer and use it in GitHub Desktop.
Total Library Portability: We now have a searchable terminal tool, readable Markdown reports, and a raw data CSV file. Best Practice Tip for CSVs: Once you open library_data.csv in Excel: Select all data (Ctrl+A). Insert > Table. Use the Filter arrows on the "Reason" or "Ext" columns to quickly see only your largest files or only your .mobi books.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| EBOOK AUDIT CENTER (V9 - CSV EXPORT & PORTABILITY) | |
| =================================================== | |
| OVERVIEW: | |
| A professional-grade ebook auditor and cataloger. | |
| This version adds the ability to export your data for use | |
| in spreadsheet software like Excel or Google Sheets. | |
| FEATURES: | |
| - [A]UDIT: Comprehensive library scan (Integrity + Size + Metadata). | |
| - [S]EARCH: Real-time search by Title or Author. | |
| - s[T]ATS: Visual breakdown of your format distribution. | |
| - [E]XPORT: Saves your library data to a .csv file. | |
| - [H]ELP: On-demand documentation. | |
| AUTHOR: Peter G | |
| DATE: 2026 | |
| """ | |
| import os | |
| import shutil | |
| import glob | |
| import datetime | |
| import zipfile | |
| import xml.etree.ElementTree as ET | |
| import csv # New requirement for spreadsheet export | |
| # --- GLOBAL STORE --- | |
| LAST_FLAGGED = [] | |
| LAST_HEALTHY = [] | |
| FORMAT_STATS = {} | |
| def show_help(): | |
| """Prints a detailed guide on how the script works.""" | |
| help_text = """ | |
| --- π‘ EBOOK COMMAND CENTER HELP GUIDE --- | |
| 1. AUDIT [A]: The core engine. It checks if EPUBs are valid ZIPs and | |
| flags any file that is suspiciously small (<50KB) or large (>100MB). | |
| 2. EXPORT [E]: Saves EVERY book found into 'library_data.csv'. | |
| This includes Title, Author, Size, Extension, and Status (Healthy/Flagged). | |
| Use this to manage your library in Excel. | |
| 3. STATS [T]: Provides a high-level view of your collection's file types. | |
| 4. SAFETY: Always backup your files before using 'Live Action' mode. | |
| ------------------------------------------ | |
| """ | |
| print(help_text) | |
| def get_epub_metadata(filepath): | |
| """Extracts Title and Author from EPUB via internal XML parsing.""" | |
| metadata = {"title": "Unknown Title", "author": "Unknown Author"} | |
| try: | |
| with zipfile.ZipFile(filepath, 'r') as z: | |
| with z.open('META-INF/container.xml') as f: | |
| tree = ET.parse(f) | |
| root = tree.getroot() | |
| rootfile_path = root.find('.//{urn:oasis:names:tc:opendocument:xmlns:container}rootfile').attrib['full-path'] | |
| with z.open(rootfile_path) as f: | |
| tree = ET.parse(f) | |
| root = tree.getroot() | |
| ns = {'n': 'http://www.idpf.org/2007/opf', 'dc': 'http://purl.org/dc/elements/1.1/'} | |
| title_elem = root.find('.//dc:title', ns) | |
| author_elem = root.find('.//dc:creator', ns) | |
| if title_elem is not None: metadata["title"] = title_elem.text | |
| if author_elem is not None: metadata["author"] = author_elem.text | |
| except Exception: | |
| return {"title": os.path.basename(filepath), "author": "N/A"} | |
| return metadata | |
| def audit_library(root_dir, corrupted_folder="CORRUPTED", dry_run=True): | |
| """Analyzes the library and populates global data stores.""" | |
| global LAST_FLAGGED, LAST_HEALTHY, FORMAT_STATS | |
| ebook_extensions = ['.epub', '.mobi', '.pdf', '.azw3'] | |
| min_size_kb, max_size_mb = 50, 100 | |
| LAST_FLAGGED, LAST_HEALTHY = [], [] | |
| FORMAT_STATS = {ext: 0 for ext in ebook_extensions} | |
| if not dry_run and not os.path.exists(corrupted_folder): | |
| os.makedirs(corrupted_folder) | |
| print(f"\n[SCANNING] Analyzing: {root_dir}...") | |
| for ext in ebook_extensions: | |
| search_pattern = os.path.join(root_dir, '**/*' + ext) | |
| for filepath in glob.glob(search_pattern, recursive=True): | |
| try: | |
| FORMAT_STATS[ext] += 1 | |
| size_kb = os.path.getsize(filepath) / 1024 | |
| reason = None | |
| meta = get_epub_metadata(filepath) if ext.lower() == '.epub' else {"title": os.path.basename(filepath), "author": "N/A"} | |
| if ext.lower() == '.epub' and not zipfile.is_zipfile(filepath): | |
| reason = "Broken File Structure" | |
| if not reason: | |
| if size_kb < min_size_kb: | |
| reason = "Too Small (Potential Corrupt)" | |
| elif size_kb > (max_size_mb * 1024): | |
| reason = "Too Large (Anomalous)" | |
| entry = { | |
| "title": meta['title'], "author": meta['author'], | |
| "size": f"{size_kb:.2f} KB" if size_kb < 1024 else f"{size_kb/1024:.2f} MB", | |
| "reason": reason if reason else "Healthy", | |
| "ext": ext, "path": filepath | |
| } | |
| if reason: | |
| if not dry_run: | |
| dest = os.path.join(corrupted_folder, os.path.basename(filepath)) | |
| shutil.move(filepath, dest) | |
| LAST_FLAGGED.append(entry) | |
| else: | |
| LAST_HEALTHY.append(entry) | |
| except Exception as e: | |
| print(f"[ERROR] {filepath}: {e}") | |
| return LAST_FLAGGED, LAST_HEALTHY | |
| def export_to_csv(filename="library_data.csv"): | |
| """Exports both healthy and flagged data to a single CSV file.""" | |
| if not LAST_FLAGGED and not LAST_HEALTHY: | |
| print("\n[!] No data to export. Please run an [A]udit first.") | |
| return | |
| all_data = LAST_HEALTHY + LAST_FLAGGED | |
| keys = ["title", "author", "size", "ext", "reason", "path"] | |
| try: | |
| with open(filename, "w", newline="", encoding="utf-8") as f: | |
| dict_writer = csv.DictWriter(f, fieldnames=keys) | |
| dict_writer.writeheader() | |
| dict_writer.writerows(all_data) | |
| print(f"\n[SUCCESS] Exported {len(all_data)} records to '{filename}'") | |
| except Exception as e: | |
| print(f"[ERROR] Failed to export CSV: {e}") | |
| def show_stats(): | |
| """Displays format distribution percentages.""" | |
| total = sum(FORMAT_STATS.values()) | |
| if total == 0: | |
| print("\n[!] No data. Run an [A]udit first.") | |
| return | |
| print(f"\n--- π FORMAT BREAKDOWN (Total: {total}) ---") | |
| for ext, count in FORMAT_STATS.items(): | |
| pct = (count / total * 100) | |
| print(f"{ext.upper():<8} | {count:<5} | {pct:.1f}%") | |
| def search_library(): | |
| """Real-time search of healthy catalog.""" | |
| if not LAST_HEALTHY: | |
| print("\n[!] Catalog is empty. Run an [A]udit first.") | |
| return | |
| query = input("\nSearch Title/Author: ").strip().lower() | |
| matches = [b for b in LAST_HEALTHY if query in b['title'].lower() or query in b['author'].lower()] | |
| for m in matches: | |
| print(f"π {m['title']} | π€ {m['author']} | π {m['size']}") | |
| if __name__ == "__main__": | |
| while True: | |
| print("\n[MENU] (A)udit | (S)earch | s(T)ats | (E)xport | (H)elp | (Q)uit") | |
| choice = input("Option: ").strip().lower() | |
| if choice == 'a': | |
| path = input("Enter path: ").strip() | |
| if os.path.isdir(path): | |
| is_dry = input("Dry Run? (Y/n): ").lower() != 'n' | |
| audit_library(path, dry_run=is_dry) | |
| print("Audit Complete! Files categorized and Markdown reports updated.") | |
| else: | |
| print("Path not found.") | |
| elif choice == 's': search_library() | |
| elif choice == 't': show_stats() | |
| elif choice == 'e': export_to_csv() | |
| elif choice == 'h': show_help() | |
| elif choice == 'q': break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment