3nws/aigreeny.py

## aigreeny.py
import html5lib  # type: ignore

import requests
import os
import re
import dropbox

from ebooklib import epub
from bs4 import BeautifulSoup
from config import app_folder, app_key, app_secret, oauth2_refresh_token

# For the refresh token
# Go to https://www.dropbox.com/oauth2/authorize?client_id=<APP_KEY>&token_access_type=offline&response_type=code and get the authorization code
# Make a post request to https://api.dropboxapi.com/oauth2/token with:
# Authorization (Basic Auth: Username = <APP_KEY>, Password = <APP_SECRET>), Body Type (x-www-form-urlencoded)
# Body: code=<AUTHORIZATION_CODE>, grant_type=authorization_code

session = requests.session()

dbx = dropbox.Dropbox(
    app_key=app_key, app_secret=app_secret, oauth2_refresh_token=oauth2_refresh_token
)

site_url = "https://www.aigreeny.com/recent"
post_url = "https://www.aigreeny.com/post"

try:
    dbx.files_get_metadata(app_folder)
except dropbox.exceptions.ApiError as e:
    if type(e.error) == dropbox.files.GetMetadataError:
        dbx.files_create_folder(app_folder)


def upload_to_dbox(epub_file_name):
    try:
        with open(
            epub_file_name,
            "rb",
        ) as f:
            dbx.files_upload(f.read(), f"{app_folder}/{epub_file_name}")
            return True
    except dropbox.exceptions.ApiError as e:
        if (type(e.error) == dropbox.files.UploadError):
            return False
        raise e


def create_epub(title, url, content, images, cover_url=None):
    book = epub.EpubBook()
    cover = session.get(cover_url, stream=True) if cover_url else None

    chapter_slug = list(filter(None, url.split("/")))[-1]
    chapter_id = chapter_slug
    chapter_title = title

    book.set_identifier(chapter_id)
    book.set_title(chapter_title)
    book.set_language("en")
    if cover_url and cover:
        book.set_cover("image.jpg", (cover.content))
        del cover

    spine = []

    chapter = epub.EpubHtml(
        title=chapter_title,
        file_name=f"{chapter_slug}.xhtml",
        lang="en",
        content=content,
    )
    book.add_item(chapter)
    spine.append(chapter)

    for idx, image in enumerate(images):
        image_content = open(image, "rb").read()
        img = epub.EpubImage(
            uid=f"image_{idx}",
            file_name=image,
            content=image_content,
        )
        book.add_item(img)
        os.remove(image)

    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    style = "BODY {color: white;}"
    nav_css = epub.EpubItem(
        uid="style_nav",
        file_name="style/nav.css",
        media_type="text/css",
        content=style,
    )
    book.add_item(nav_css)
    book.spine = ["nav", *spine]

    epub_file_name = f"{chapter_title.replace('/', '')}.epub".replace(
        ":", ""
    )
    epub.write_epub(epub_file_name, book, {})
    uploaded = upload_to_dbox(epub_file_name)
    # Post notification to local ntfy
    if uploaded:
        session.post(
            "http://localhost/aigreeny",
            data=f"Uploaded {epub_file_name} to dropbox!".encode(encoding="utf-8"),
            headers={
                "Title": "AI GREENY",
                "Priority": "4"
            }
        )

def get_soup(url):
    res = session.get(url)
    if res.status_code != 200:
        return
    page = res.content
    soup = BeautifulSoup(page.decode("utf-8"), "html5lib")
    return soup

def get_recent_posts():
    return get_soup(site_url).select_one("#recent-posts")

def get_chapter_list(soup):
    return soup.select(".translation-item")

def get_chapter_content(site_url):
    return get_soup(site_url).select_one(".post-container .post-content .article-content")

def get_arc_title(soup):
    return clean_string_content(soup.select_one(".folder-path").text)

def clean_string_content(text):
    text = re.sub(r'\s*<span class="folder-separator">.*?</span>\s*', ' ', text, flags=re.DOTALL)
    text = re.sub(r'[^\w\s\-\.\,\&\:]', '', text, flags=re.UNICODE)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def parse_chapter_id(onclick):
    pattern = r'^openPost\((\d+)\)$'

    match = re.search(pattern, onclick.strip())

    if match:
        try:
            return int(match.group(1))
        except ValueError:
            return None
    else:
        return None


def main():
    recent_posts = get_recent_posts()
    chapter_list = get_chapter_list(recent_posts)
    latest_chapter_entry = chapter_list[0] if len(chapter_list) > 0 else None
    if latest_chapter_entry is None:
        print("latest_chapter_entry is None")
        return

    latest_chapter_id = parse_chapter_id(latest_chapter_entry.get('onclick'))
    latest_chapter_title = latest_chapter_entry.select_one('.translation-title').text
    latest_chapter_url = f"{post_url}/{latest_chapter_id}"
    arc_title = get_arc_title(latest_chapter_entry)

    filename = "./latest_chapter.txt"
    if not os.path.exists(filename):
        open(filename, "w").close()
    with open(filename, "r+") as f:
        saved_latest_chapter_url = f.readline()
        if saved_latest_chapter_url != latest_chapter_url:
            f.seek(0)
            f.write(latest_chapter_url)
            f.truncate()
            entry_content = get_chapter_content(latest_chapter_url)
            content = "".join([str(tag) for tag in entry_content.contents])
            create_epub(f"${arc_title} - {latest_chapter_title}", latest_chapter_url, content, [])


if __name__ == "__main__":
    main()
	import html5lib # type: ignore

	import requests
	import os
	import re
	import dropbox

	from ebooklib import epub
	from bs4 import BeautifulSoup
	from config import app_folder, app_key, app_secret, oauth2_refresh_token

	# For the refresh token
	# Go to https://www.dropbox.com/oauth2/authorize?client_id=<APP_KEY>&token_access_type=offline&response_type=code and get the authorization code
	# Make a post request to https://api.dropboxapi.com/oauth2/token with:
	# Authorization (Basic Auth: Username = <APP_KEY>, Password = <APP_SECRET>), Body Type (x-www-form-urlencoded)
	# Body: code=<AUTHORIZATION_CODE>, grant_type=authorization_code

	session = requests.session()

	dbx = dropbox.Dropbox(
	app_key=app_key, app_secret=app_secret, oauth2_refresh_token=oauth2_refresh_token
	)

	site_url = "https://www.aigreeny.com/recent"
	post_url = "https://www.aigreeny.com/post"

	try:
	dbx.files_get_metadata(app_folder)
	except dropbox.exceptions.ApiError as e:
	if type(e.error) == dropbox.files.GetMetadataError:
	dbx.files_create_folder(app_folder)



	def upload_to_dbox(epub_file_name):
	try:
	with open(
	epub_file_name,
	"rb",
	) as f:
	dbx.files_upload(f.read(), f"{app_folder}/{epub_file_name}")
	return True
	except dropbox.exceptions.ApiError as e:
	if (type(e.error) == dropbox.files.UploadError):
	return False
	raise e


	def create_epub(title, url, content, images, cover_url=None):
	book = epub.EpubBook()
	cover = session.get(cover_url, stream=True) if cover_url else None

	chapter_slug = list(filter(None, url.split("/")))[-1]
	chapter_id = chapter_slug
	chapter_title = title

	book.set_identifier(chapter_id)
	book.set_title(chapter_title)
	book.set_language("en")
	if cover_url and cover:
	book.set_cover("image.jpg", (cover.content))
	del cover

	spine = []

	chapter = epub.EpubHtml(
	title=chapter_title,
	file_name=f"{chapter_slug}.xhtml",
	lang="en",
	content=content,
	)
	book.add_item(chapter)
	spine.append(chapter)

	for idx, image in enumerate(images):
	image_content = open(image, "rb").read()
	img = epub.EpubImage(
	uid=f"image_{idx}",
	file_name=image,
	content=image_content,
	)
	book.add_item(img)
	os.remove(image)

	book.add_item(epub.EpubNcx())
	book.add_item(epub.EpubNav())

	style = "BODY {color: white;}"
	nav_css = epub.EpubItem(
	uid="style_nav",
	file_name="style/nav.css",
	media_type="text/css",
	content=style,
	)
	book.add_item(nav_css)
	book.spine = ["nav", *spine]

	epub_file_name = f"{chapter_title.replace('/', '')}.epub".replace(
	":", ""
	)
	epub.write_epub(epub_file_name, book, {})
	uploaded = upload_to_dbox(epub_file_name)
	# Post notification to local ntfy
	if uploaded:
	session.post(
	"http://localhost/aigreeny",
	data=f"Uploaded {epub_file_name} to dropbox!".encode(encoding="utf-8"),
	headers={
	"Title": "AI GREENY",
	"Priority": "4"
	}
	)

	def get_soup(url):
	res = session.get(url)
	if res.status_code != 200:
	return
	page = res.content
	soup = BeautifulSoup(page.decode("utf-8"), "html5lib")
	return soup

	def get_recent_posts():
	return get_soup(site_url).select_one("#recent-posts")

	def get_chapter_list(soup):
	return soup.select(".translation-item")

	def get_chapter_content(site_url):
	return get_soup(site_url).select_one(".post-container .post-content .article-content")

	def get_arc_title(soup):
	return clean_string_content(soup.select_one(".folder-path").text)

	def clean_string_content(text):
	text = re.sub(r'\s<span class="folder-separator">.?</span>\s*', ' ', text, flags=re.DOTALL)
	text = re.sub(r'[^\w\s\-\.\,\&\:]', '', text, flags=re.UNICODE)
	text = re.sub(r'\s+', ' ', text).strip()
	return text

	def parse_chapter_id(onclick):
	pattern = r'^openPost\((\d+)\)$'

	match = re.search(pattern, onclick.strip())

	if match:
	try:
	return int(match.group(1))
	except ValueError:
	return None
	else:
	return None


	def main():
	recent_posts = get_recent_posts()
	chapter_list = get_chapter_list(recent_posts)
	latest_chapter_entry = chapter_list[0] if len(chapter_list) > 0 else None
	if latest_chapter_entry is None:
	print("latest_chapter_entry is None")
	return

	latest_chapter_id = parse_chapter_id(latest_chapter_entry.get('onclick'))
	latest_chapter_title = latest_chapter_entry.select_one('.translation-title').text
	latest_chapter_url = f"{post_url}/{latest_chapter_id}"
	arc_title = get_arc_title(latest_chapter_entry)

	filename = "./latest_chapter.txt"
	if not os.path.exists(filename):
	open(filename, "w").close()
	with open(filename, "r+") as f:
	saved_latest_chapter_url = f.readline()
	if saved_latest_chapter_url != latest_chapter_url:
	f.seek(0)
	f.write(latest_chapter_url)
	f.truncate()
	entry_content = get_chapter_content(latest_chapter_url)
	content = "".join([str(tag) for tag in entry_content.contents])
	create_epub(f"${arc_title} - {latest_chapter_title}", latest_chapter_url, content, [])


	if __name__ == "__main__":
	main()
No results found