3nws/wct.py

## wct.py
import html5lib  # type: ignore

import requests
import os
import dropbox

from ebooklib import epub
from bs4 import BeautifulSoup
from config import app_folder, app_key, app_secret, oauth2_refresh_token

# For the refresh token
# Go to https://www.dropbox.com/oauth2/authorize?client_id=<APP_KEY>&token_access_type=offline&response_type=code and get the authorization code
# Make a post request to https://api.dropboxapi.com/oauth2/token with:
# Authorization (Basic Auth: Username = <APP_KEY>, Password = <APP_SECRET>), Body Type (x-www-form-urlencoded)
# Body: code=<AUTHORIZATION_CODE>, grant_type=authorization_code

session = requests.session()

dbx = dropbox.Dropbox(
    app_key=app_key, app_secret=app_secret, oauth2_refresh_token=oauth2_refresh_token
)

# If you want to process a specific url paste here
chapter_url_manual = ""

try:
    dbx.files_get_metadata(app_folder)
except dropbox.exceptions.ApiError as e:
    if type(e.error) == dropbox.files.GetMetadataError:
        dbx.files_create_folder(app_folder)


site_url = f"https://witchculttranslation.com"


def upload_to_dbox(epub_file_name):
    try:
        with open(
            epub_file_name,
            "rb",
        ) as f:
            dbx.files_upload(f.read(), f"{app_folder}/{epub_file_name}")
            return True
    except dropbox.exceptions.ApiError as e:
        if (type(e.error) == dropbox.files.UploadError):
            return False
        raise e


def create_epub(title, url, content, images, cover_url=None):
    book = epub.EpubBook()
    cover = session.get(cover_url, stream=True) if cover_url else None

    chapter_slug = list(filter(None, url.split("/")))[-1]
    chapter_id = chapter_slug
    chapter_title = title

    book.set_identifier(chapter_id)
    book.set_title(chapter_title)
    book.set_language("en")
    if cover_url and cover:
        book.set_cover("image.jpg", (cover.content))
        del cover

    spine = []

    chapter = epub.EpubHtml(
        title=chapter_title,
        file_name=f"{chapter_slug}.xhtml",
        lang="en",
        content=content,
    )
    book.add_item(chapter)
    spine.append(chapter)

    for idx, image in enumerate(images):
        image_content = open(image, "rb").read()
        img = epub.EpubImage(
            uid=f"image_{idx}",
            file_name=image,
            content=image_content,
        )
        book.add_item(img)
        os.remove(image)

    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    style = "BODY {color: white;}"
    nav_css = epub.EpubItem(
        uid="style_nav",
        file_name="style/nav.css",
        media_type="text/css",
        content=style,
    )
    book.add_item(nav_css)
    book.spine = ["nav", *spine]

    epub_file_name = f"Re: Zero - {chapter_title.replace('/', '')}.epub".replace(
        ":", ""
    )
    epub.write_epub(epub_file_name, book, {})
    uploaded = upload_to_dbox(epub_file_name)
    # Post notification to local ntfy
    if uploaded:
        session.post(
            "http://localhost/wct",
            data=f"Uploaded {epub_file_name} to dropbox!".encode(encoding="utf-8"),
            headers={
                "Title": "WCT",
                "Priority": "4"
            }
        )


def get_widget_area(url):
    res = session.get(url)
    if res.status_code != 200:
        return
    page = res.content
    soup = BeautifulSoup(page.decode("utf-8"), "html5lib")

    return soup.select_one(".widget-area")


def get_entry_content(url):
    res = session.get(url)
    if res.status_code != 200:
        return
    page = res.content
    soup = BeautifulSoup(page.decode("utf-8"), "html5lib")

    return soup.select_one(".entry-content")


def get_entry_title(url):
    res = session.get(url)
    if res.status_code != 200:
        return
    page = res.content
    soup = BeautifulSoup(page.decode("utf-8"), "html5lib")

    return soup.select_one(".entry-title")


def main():
    widget_area = get_widget_area(site_url)
    recent_posts_section = widget_area.select_one("section.recent-posts-extended")
    recent = recent_posts_section.select_one(".rpwe-title")

    a_tag = recent.findChild()
    chapter_url = chapter_url_manual or a_tag.get("href")
    filename = "./latest_chapter.txt"
    if not os.path.exists(filename):
        open(filename, "w").close()
    with open(filename, "r+") as f:
        latest_chapter_url = f.readline()
        if latest_chapter_url != chapter_url or chapter_url_manual:
            if not chapter_url_manual:
                f.seek(0)
                f.write(chapter_url)
                f.truncate()
            entry_content = get_entry_content(chapter_url)
            entry_title = get_entry_title(chapter_url).text
            p_image = entry_content.find("p").select_one("img")
            if p_image is None:
                next_sibling = entry_content.find("p").find_next_sibling()
                while (
                    next_sibling.name != "p" and next_sibling.findChild("img") is None
                ):
                    next_sibling = next_sibling.find_next_sibling()
                p_image = next_sibling.findChild()

            cover_url = p_image.get("src") if p_image is not None else None
            images = []
            for idx, tag in enumerate(entry_content.contents[:10]):
                if (tag.find("img")) and tag.find("img") != -1:
                    image = tag.find("img")
                    image_url = image.get("src")
                    temp = session.get(image_url, stream=True)
                    image_path = f"images/local_image_{idx}.jpg"

                    os.makedirs(os.path.dirname(image_path), exist_ok=True)

                    with open(image_path, "wb") as f:
                        f.write(temp.content)

                    images.append(image_path)
                    image["src"] = image_path
                    del temp

            content = "".join([str(tag) for tag in entry_content.contents])
            create_epub(entry_title, chapter_url, content, images, cover_url)


if __name__ == "__main__":
    main()
	import html5lib # type: ignore

	import requests
	import os
	import dropbox

	from ebooklib import epub
	from bs4 import BeautifulSoup
	from config import app_folder, app_key, app_secret, oauth2_refresh_token

	# For the refresh token
	# Go to https://www.dropbox.com/oauth2/authorize?client_id=<APP_KEY>&token_access_type=offline&response_type=code and get the authorization code
	# Make a post request to https://api.dropboxapi.com/oauth2/token with:
	# Authorization (Basic Auth: Username = <APP_KEY>, Password = <APP_SECRET>), Body Type (x-www-form-urlencoded)
	# Body: code=<AUTHORIZATION_CODE>, grant_type=authorization_code

	session = requests.session()

	dbx = dropbox.Dropbox(
	app_key=app_key, app_secret=app_secret, oauth2_refresh_token=oauth2_refresh_token
	)

	# If you want to process a specific url paste here
	chapter_url_manual = ""

	try:
	dbx.files_get_metadata(app_folder)
	except dropbox.exceptions.ApiError as e:
	if type(e.error) == dropbox.files.GetMetadataError:
	dbx.files_create_folder(app_folder)


	site_url = f"https://witchculttranslation.com"


	def upload_to_dbox(epub_file_name):
	try:
	with open(
	epub_file_name,
	"rb",
	) as f:
	dbx.files_upload(f.read(), f"{app_folder}/{epub_file_name}")
	return True
	except dropbox.exceptions.ApiError as e:
	if (type(e.error) == dropbox.files.UploadError):
	return False
	raise e


	def create_epub(title, url, content, images, cover_url=None):
	book = epub.EpubBook()
	cover = session.get(cover_url, stream=True) if cover_url else None

	chapter_slug = list(filter(None, url.split("/")))[-1]
	chapter_id = chapter_slug
	chapter_title = title

	book.set_identifier(chapter_id)
	book.set_title(chapter_title)
	book.set_language("en")
	if cover_url and cover:
	book.set_cover("image.jpg", (cover.content))
	del cover

	spine = []

	chapter = epub.EpubHtml(
	title=chapter_title,
	file_name=f"{chapter_slug}.xhtml",
	lang="en",
	content=content,
	)
	book.add_item(chapter)
	spine.append(chapter)

	for idx, image in enumerate(images):
	image_content = open(image, "rb").read()
	img = epub.EpubImage(
	uid=f"image_{idx}",
	file_name=image,
	content=image_content,
	)
	book.add_item(img)
	os.remove(image)

	book.add_item(epub.EpubNcx())
	book.add_item(epub.EpubNav())

	style = "BODY {color: white;}"
	nav_css = epub.EpubItem(
	uid="style_nav",
	file_name="style/nav.css",
	media_type="text/css",
	content=style,
	)
	book.add_item(nav_css)
	book.spine = ["nav", *spine]

	epub_file_name = f"Re: Zero - {chapter_title.replace('/', '')}.epub".replace(
	":", ""
	)
	epub.write_epub(epub_file_name, book, {})
	uploaded = upload_to_dbox(epub_file_name)
	# Post notification to local ntfy
	if uploaded:
	session.post(
	"http://localhost/wct",
	data=f"Uploaded {epub_file_name} to dropbox!".encode(encoding="utf-8"),
	headers={
	"Title": "WCT",
	"Priority": "4"
	}
	)


	def get_widget_area(url):
	res = session.get(url)
	if res.status_code != 200:
	return
	page = res.content
	soup = BeautifulSoup(page.decode("utf-8"), "html5lib")

	return soup.select_one(".widget-area")


	def get_entry_content(url):
	res = session.get(url)
	if res.status_code != 200:
	return
	page = res.content
	soup = BeautifulSoup(page.decode("utf-8"), "html5lib")

	return soup.select_one(".entry-content")


	def get_entry_title(url):
	res = session.get(url)
	if res.status_code != 200:
	return
	page = res.content
	soup = BeautifulSoup(page.decode("utf-8"), "html5lib")

	return soup.select_one(".entry-title")


	def main():
	widget_area = get_widget_area(site_url)
	recent_posts_section = widget_area.select_one("section.recent-posts-extended")
	recent = recent_posts_section.select_one(".rpwe-title")

	a_tag = recent.findChild()
	chapter_url = chapter_url_manual or a_tag.get("href")
	filename = "./latest_chapter.txt"
	if not os.path.exists(filename):
	open(filename, "w").close()
	with open(filename, "r+") as f:
	latest_chapter_url = f.readline()
	if latest_chapter_url != chapter_url or chapter_url_manual:
	if not chapter_url_manual:
	f.seek(0)
	f.write(chapter_url)
	f.truncate()
	entry_content = get_entry_content(chapter_url)
	entry_title = get_entry_title(chapter_url).text
	p_image = entry_content.find("p").select_one("img")
	if p_image is None:
	next_sibling = entry_content.find("p").find_next_sibling()
	while (
	next_sibling.name != "p" and next_sibling.findChild("img") is None
	):
	next_sibling = next_sibling.find_next_sibling()
	p_image = next_sibling.findChild()

	cover_url = p_image.get("src") if p_image is not None else None
	images = []
	for idx, tag in enumerate(entry_content.contents[:10]):
	if (tag.find("img")) and tag.find("img") != -1:
	image = tag.find("img")
	image_url = image.get("src")
	temp = session.get(image_url, stream=True)
	image_path = f"images/local_image_{idx}.jpg"

	os.makedirs(os.path.dirname(image_path), exist_ok=True)

	with open(image_path, "wb") as f:
	f.write(temp.content)

	images.append(image_path)
	image["src"] = image_path
	del temp

	content = "".join([str(tag) for tag in entry_content.contents])
	create_epub(entry_title, chapter_url, content, images, cover_url)


	if __name__ == "__main__":
	main()
No results found