Skip to content

Instantly share code, notes, and snippets.

@VoidAny
Last active August 10, 2025 20:12
Show Gist options
  • Select an option

  • Save VoidAny/431c652d2e8d6fc58c740b75585b596e to your computer and use it in GitHub Desktop.

Select an option

Save VoidAny/431c652d2e8d6fc58c740b75585b596e to your computer and use it in GitHub Desktop.
Does your favorite web novel use dashes to start dialouge instead of bounding it with qoutes? This program will fix it! Created to process this book/series on AO3: https://archiveofourown.org/works/51538063
from bs4 import BeautifulSoup
from pathlib import Path
DASHES = ("—", "–", "-")
# Generate dashes_spaced
DASHES_SPACED = []
for dash in DASHES:
# Handle all of the different space types
DASHES_SPACED.append(f" {dash} ")
DASHES_SPACED.append(f" {dash}\u00A0")
DASHES_SPACED.append(f"\u00A0{dash} ")
DASHES_SPACED.append(f"\u00A0{dash}\u00A0")
DASHES_SPACED.append(f"\u202F{dash} ")
DASHES_SPACED.append(f" {dash}\u202F")
DASHES_SPACED.append(f"\u202F{dash}\u202F")
DASHES_SPACED.append(f"\u2007{dash} ")
DASHES_SPACED.append(f" {dash}\u2007")
DASHES_SPACED.append(f"\u2007{dash}\u2007")
def main(book: Path, out: Path) -> None:
with book.open('r') as f:
soup = BeautifulSoup(f.read(), "html.parser")
for p in soup.find_all("p"):
# First dash isn't counted so assume 1 exists initially
dash_count = 1
if not p.text:
continue
if not p.text.strip():
continue
# Perserve em
if p.em and not p.em.text:
ps = p.em.text.strip()
if ps:
if ps[0] in DASHES:
# Count dashes to see if end quote is needed. Even num = no end quote
for dash in DASHES_SPACED:
dash_count += ps.count(dash)
p.em.string = '"' + ps[1:].strip() + '"'*(not dash_count % 2 == 0)
# Do in-paragraph dash subsitution
# This works becuase the in-paragraph ones are space coated
for dash in DASHES_SPACED:
p.em.string = p.em.string.replace(dash, '"')
# No em with text
else:
ps = p.text.strip()
if ps[0] in DASHES:
# Count dashes to see if end quote is needed. Even num = no end quote
for dash in DASHES_SPACED:
dash_count += ps.count(dash)
p.string = '"' + ps[1:].strip() + '"'*(not dash_count % 2 == 0)
# Do in-paragraph dash subsitution
# This works becuase the in-paragraph ones are space coated
for dash in DASHES_SPACED:
p.string = p.string.replace(dash, '"')
with out.open('w') as f:
f.write(str(soup))
if __name__ == "__main__":
import sys, argparse
parser = argparse.ArgumentParser(description="Does your favorite web novel use dashes to start dialouge instead of bounding it with qoutes? \
This program will fix it!")
parser.add_argument("book", type=str, help="Path to input html book")
parser.add_argument("out", type=str, help="Output location the program will write to")
args = parser.parse_args()
main(Path(args.book), Path(args.out))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment