LoboLofi/DownloadBerserk.py

## DownloadBerserk.py
from bs4 import BeautifulSoup
import requests
import re
import sys

def download( fileUri, filename ):
  '''
  Function to download files with http get.
  The limit size is around 10Mb
  '''
  print("Download: " + fileUri )
  url = fileUri
  r = requests.get(url, allow_redirects=True)
  open( filename, 'wb' ).write(r.content)

def getUrlFrom(url, tag, att, http="https" ):
  '''
  Download an hmtml file from url and return all the att from the tags specified.
  '''
  r = requests.get(url, allow_redirects=True)
  soup = BeautifulSoup( r.content, features="lxml" )
  ret = []
  for link in soup.findAll(tag, attrs={att: re.compile("^"+http+"://")}):
    ret.append( link.get( att ).rstrip() )
  return ret

def filterPngFiles(link):
  '''
  Probably you only want png or jpg files, so you can change the extencion.
  '''
  extension = '.png'
  if link[-4:] == extension:
    return True
  else:
    return False

def downloadManga(path, num, pathObjetive):
  '''
  First, you get all the links to posibly images from the path.
  Next, you filter for the extension you think contains your manga.
  Last, you download the image file.
  '''
  print("Download from: " + path)
  listImg = getUrlFrom( path, 'img', 'src')

  filImg = filter(filterPngFiles, listImg)
  listImg = list(filImg)

  if len(listImg) == 0:
    #Sometimes a lot of files does't come in https, but in https, so you want to check for that
    listImg = getUrlFrom( path, 'img', 'src', 'http')
    filImg = filter(filterPngFiles, listImg)
  #Is very probably you get a lot of disctint names, is a very good idea
  #standarize the names. Num is used as the chapter number and i as the
  #page numer.
  #Again, you must know the image extension you want.
  i = 0
  for im in listImg:
    i = i + 1
    download(im,pathObjetive+"/"+'{0:03d}'.format(num)+"-"+'{0:03d}'.format(i)+".png")

for i in range(1,358):
  #downloadManga('https://readberserk.com/chapter/berserk-chapter-' + '{0:03d}'.format(i) + '/', i, 'berserk')
  downloadManga(sys.argv[1] + '{0:03d}'.format(i) + '/', i, sys.argv[2])
	from bs4 import BeautifulSoup
	import requests
	import re
	import sys

	def download( fileUri, filename ):
	'''
	Function to download files with http get.
	The limit size is around 10Mb
	'''
	print("Download: " + fileUri )
	url = fileUri
	r = requests.get(url, allow_redirects=True)
	open( filename, 'wb' ).write(r.content)

	def getUrlFrom(url, tag, att, http="https" ):
	'''
	Download an hmtml file from url and return all the att from the tags specified.
	'''
	r = requests.get(url, allow_redirects=True)
	soup = BeautifulSoup( r.content, features="lxml" )
	ret = []
	for link in soup.findAll(tag, attrs={att: re.compile("^"+http+"://")}):
	ret.append( link.get( att ).rstrip() )
	return ret

	def filterPngFiles(link):
	'''
	Probably you only want png or jpg files, so you can change the extencion.
	'''
	extension = '.png'
	if link[-4:] == extension:
	return True
	else:
	return False

	def downloadManga(path, num, pathObjetive):
	'''
	First, you get all the links to posibly images from the path.
	Next, you filter for the extension you think contains your manga.
	Last, you download the image file.
	'''
	print("Download from: " + path)
	listImg = getUrlFrom( path, 'img', 'src')

	filImg = filter(filterPngFiles, listImg)
	listImg = list(filImg)

	if len(listImg) == 0:
	#Sometimes a lot of files does't come in https, but in https, so you want to check for that
	listImg = getUrlFrom( path, 'img', 'src', 'http')
	filImg = filter(filterPngFiles, listImg)
	#Is very probably you get a lot of disctint names, is a very good idea
	#standarize the names. Num is used as the chapter number and i as the
	#page numer.
	#Again, you must know the image extension you want.
	i = 0
	for im in listImg:
	i = i + 1
	download(im,pathObjetive+"/"+'{0:03d}'.format(num)+"-"+'{0:03d}'.format(i)+".png")

	for i in range(1,358):
	#downloadManga('https://readberserk.com/chapter/berserk-chapter-' + '{0:03d}'.format(i) + '/', i, 'berserk')
	downloadManga(sys.argv[1] + '{0:03d}'.format(i) + '/', i, sys.argv[2])
No results found