Last active
August 10, 2025 20:12
-
-
Save VoidAny/431c652d2e8d6fc58c740b75585b596e to your computer and use it in GitHub Desktop.
Does your favorite web novel use dashes to start dialouge instead of bounding it with qoutes? This program will fix it! Created to process this book/series on AO3: https://archiveofourown.org/works/51538063
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from bs4 import BeautifulSoup | |
| from pathlib import Path | |
| DASHES = ("—", "–", "-") | |
| # Generate dashes_spaced | |
| DASHES_SPACED = [] | |
| for dash in DASHES: | |
| # Handle all of the different space types | |
| DASHES_SPACED.append(f" {dash} ") | |
| DASHES_SPACED.append(f" {dash}\u00A0") | |
| DASHES_SPACED.append(f"\u00A0{dash} ") | |
| DASHES_SPACED.append(f"\u00A0{dash}\u00A0") | |
| DASHES_SPACED.append(f"\u202F{dash} ") | |
| DASHES_SPACED.append(f" {dash}\u202F") | |
| DASHES_SPACED.append(f"\u202F{dash}\u202F") | |
| DASHES_SPACED.append(f"\u2007{dash} ") | |
| DASHES_SPACED.append(f" {dash}\u2007") | |
| DASHES_SPACED.append(f"\u2007{dash}\u2007") | |
| def main(book: Path, out: Path) -> None: | |
| with book.open('r') as f: | |
| soup = BeautifulSoup(f.read(), "html.parser") | |
| for p in soup.find_all("p"): | |
| # First dash isn't counted so assume 1 exists initially | |
| dash_count = 1 | |
| if not p.text: | |
| continue | |
| if not p.text.strip(): | |
| continue | |
| # Perserve em | |
| if p.em and not p.em.text: | |
| ps = p.em.text.strip() | |
| if ps: | |
| if ps[0] in DASHES: | |
| # Count dashes to see if end quote is needed. Even num = no end quote | |
| for dash in DASHES_SPACED: | |
| dash_count += ps.count(dash) | |
| p.em.string = '"' + ps[1:].strip() + '"'*(not dash_count % 2 == 0) | |
| # Do in-paragraph dash subsitution | |
| # This works becuase the in-paragraph ones are space coated | |
| for dash in DASHES_SPACED: | |
| p.em.string = p.em.string.replace(dash, '"') | |
| # No em with text | |
| else: | |
| ps = p.text.strip() | |
| if ps[0] in DASHES: | |
| # Count dashes to see if end quote is needed. Even num = no end quote | |
| for dash in DASHES_SPACED: | |
| dash_count += ps.count(dash) | |
| p.string = '"' + ps[1:].strip() + '"'*(not dash_count % 2 == 0) | |
| # Do in-paragraph dash subsitution | |
| # This works becuase the in-paragraph ones are space coated | |
| for dash in DASHES_SPACED: | |
| p.string = p.string.replace(dash, '"') | |
| with out.open('w') as f: | |
| f.write(str(soup)) | |
| if __name__ == "__main__": | |
| import sys, argparse | |
| parser = argparse.ArgumentParser(description="Does your favorite web novel use dashes to start dialouge instead of bounding it with qoutes? \ | |
| This program will fix it!") | |
| parser.add_argument("book", type=str, help="Path to input html book") | |
| parser.add_argument("out", type=str, help="Output location the program will write to") | |
| args = parser.parse_args() | |
| main(Path(args.book), Path(args.out)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment