Last active
October 6, 2020 23:56
-
-
Save LoboLofi/497d317c21bf8bb20a509a5a2b80d1e4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from bs4 import BeautifulSoup | |
| import requests | |
| import re | |
| import sys | |
| def download( fileUri, filename ): | |
| ''' | |
| Function to download files with http get. | |
| The limit size is around 10Mb | |
| ''' | |
| print("Download: " + fileUri ) | |
| url = fileUri | |
| r = requests.get(url, allow_redirects=True) | |
| open( filename, 'wb' ).write(r.content) | |
| def getUrlFrom(url, tag, att, http="https" ): | |
| ''' | |
| Download an hmtml file from url and return all the att from the tags specified. | |
| ''' | |
| r = requests.get(url, allow_redirects=True) | |
| soup = BeautifulSoup( r.content, features="lxml" ) | |
| ret = [] | |
| for link in soup.findAll(tag, attrs={att: re.compile("^"+http+"://")}): | |
| ret.append( link.get( att ).rstrip() ) | |
| return ret | |
| def filterPngFiles(link): | |
| ''' | |
| Probably you only want png or jpg files, so you can change the extencion. | |
| ''' | |
| extension = '.png' | |
| if link[-4:] == extension: | |
| return True | |
| else: | |
| return False | |
| def downloadManga(path, num, pathObjetive): | |
| ''' | |
| First, you get all the links to posibly images from the path. | |
| Next, you filter for the extension you think contains your manga. | |
| Last, you download the image file. | |
| ''' | |
| print("Download from: " + path) | |
| listImg = getUrlFrom( path, 'img', 'src') | |
| filImg = filter(filterPngFiles, listImg) | |
| listImg = list(filImg) | |
| if len(listImg) == 0: | |
| #Sometimes a lot of files does't come in https, but in https, so you want to check for that | |
| listImg = getUrlFrom( path, 'img', 'src', 'http') | |
| filImg = filter(filterPngFiles, listImg) | |
| #Is very probably you get a lot of disctint names, is a very good idea | |
| #standarize the names. Num is used as the chapter number and i as the | |
| #page numer. | |
| #Again, you must know the image extension you want. | |
| i = 0 | |
| for im in listImg: | |
| i = i + 1 | |
| download(im,pathObjetive+"/"+'{0:03d}'.format(num)+"-"+'{0:03d}'.format(i)+".png") | |
| for i in range(1,358): | |
| #downloadManga('https://readberserk.com/chapter/berserk-chapter-' + '{0:03d}'.format(i) + '/', i, 'berserk') | |
| downloadManga(sys.argv[1] + '{0:03d}'.format(i) + '/', i, sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment