Skip to content

Instantly share code, notes, and snippets.

@itcrab
Last active June 26, 2016 17:25
Show Gist options
  • Select an option

  • Save itcrab/3b8538a29cf3f8d16960d58dd6aecd81 to your computer and use it in GitHub Desktop.

Select an option

Save itcrab/3b8538a29cf3f8d16960d58dd6aecd81 to your computer and use it in GitHub Desktop.
Script for backup largest files (more than 10GB) to Yandex.Disk through WebDAV (10GB is limit on 06.2016).
#!/usr/bin/python3
import argparse
import os
import re
import sys
import subprocess
from datetime import datetime
def get_timestamp():
timestamp = datetime.now().strftime('%Y-%m-%d__%H-%M-%S')
return timestamp
def convert_to_megabytes(number):
number *= 1024 * 1024
return number
def get_args():
parser = argparse.ArgumentParser(
description='Yandex.Disk large file uploader through WebDAV.',
epilog="""
# How to using?
- copy this scrip into /bin;
- make symbolic link from `ya-disk-large-file-backuper.py` to
`ya-disk-large-file-backuper`;
- enjoy!
# Command for example (build from stream parts by 5GB):
time tar cf - ./site.ru | \
ya-disk-large-file-backuper -oa OAuth_Key \
-l login \
-p password \
-d backup \
-s s000001 \
-c site.ru \
-t files \
-e .tar \
--count 2 \
--buffer 64 \
--part 5120
"""
)
parser.add_argument('-oa', '--oauth', dest='oauth', required=True,
help='OAuth key for Yandex.Disk.')
parser.add_argument('-l', '--login', dest='login', required=True,
help='Login for Yandex.Disk.')
parser.add_argument('-p', '--password', dest='password', required=True,
help='Password for Yandex.Disk.')
parser.add_argument('-d', '--directory', dest='directory', default='backup',
help='Directory for start backup path.')
parser.add_argument('-s', '--server', dest='server', required=True,
help='Server name. Using in backup path.')
parser.add_argument('-c', '--client', dest='client', required=True,
help='Client name. Using in backup path.')
parser.add_argument('-t', '--type', dest='type', required=True,
help='Type of backup. Using in backup path.')
parser.add_argument('-e', '--extension', dest='extension', required=True,
help='Extension for output file.')
parser.add_argument('--count', dest='count', default='7', type=int,
help='Count last directories for save before remove.')
parser.add_argument('--buffer', dest='buffer', default='64', type=int,
help='Buffer size reading stream in MB. Default: 64.')
parser.add_argument('--part', dest='part', default='5120', type=int,
help='Part file size in MB. Default: 5120')
return parser.parse_args()
def get_path(directory, server, client, _type, is_full=False):
path = '/{directory}/{server}/{client}/{type}/'.format(
directory=directory, server=server, client=client, type=_type
)
if is_full:
path += '{client}__{type}__{timestamp}'.format(
client=client, type=_type, timestamp=TIMESTAMP
)
return path
def get_file_name(client, _type, extension):
file_name = '{client}__{type}__{timestamp}{extension}'.format(
client=client, type=_type, timestamp=TIMESTAMP, extension=extension
)
return file_name
def get_command(method, path, oauth, login, password):
cmd = ''
if method in ['PROPFIND', 'DELETE']:
cmd = 'curl -i ' \
'--request "{method} {path} HTTP/1.1" ' \
'--header "Host: webdav.yandex.ru" ' \
'--header "Depth: 1" ' \
'--header "Accept: */* Authorization: OAuth {oauth}" ' \
'--user {login}:{password} https://webdav.yandex.ru/'
cmd = cmd.format(
method=method, path=path, oauth=oauth,
login=login, password=password
)
elif method == 'MKCOL':
cmd = 'curl -i ' \
'--request "{method} {path} HTTP/1.1" ' \
'--header "Host: webdav.yandex.ru" ' \
'--header "Accept: */* Authorization: OAuth {oauth}" ' \
'--user {login}:{password} https://webdav.yandex.ru/'
cmd = cmd.format(
method=method, path=path, oauth=oauth,
login=login, password=password
)
elif method == 'UPLOAD':
cmd = 'curl ' \
'-T {output_file} ' \
'--user {login}:{password} ' \
'https://webdav.yandex.ru/{path}'
cmd = cmd.format(
output_file=path.split('/')[-1],
login=login, password=password,
path=path
)
if len(cmd) == 0:
raise ValueError('Fatal error: command is blank!')
return cmd
def run_command(cmd):
cmd += ' 2>&1 1>/dev/null'
subprocess.call(cmd, shell=True)
def remove_old_backup(path, count, oauth, login, password):
cmd_list = get_command('PROPFIND', path, oauth, login, password)
output = subprocess.check_output(cmd_list, shell=True)
directories = re.findall('<d:href>(.+?)</d:href>', str(output))
if len(directories) > count:
directories = directories[1:len(directories) + 1 - count]
for directory in directories:
cmd_remove = get_command('DELETE', directory, oauth, login, password)
run_command(cmd_remove)
print(directory)
def create_backup_path(path, oauth, login, password):
backup_path = '/'
for directory in path.split('/'):
if len(directory) == 0:
continue
backup_path += '{directory}/'.format(directory=directory)
cmd_create = get_command('MKCOL', backup_path, oauth, login, password)
run_command(cmd_create)
def remove_file(output_file):
if os.path.exists(output_file):
os.remove(output_file)
def upload_file_part(backup_path, login, oauth, output_file, password):
path_to_backup_file = '{path}/{file}'.format(
path=backup_path, file=output_file
)
cmd_upload = get_command(
'UPLOAD', path_to_backup_file, oauth, login, password
)
run_command(cmd_upload)
def gen_next_file_part_name(output_file, part_number):
if part_number == 1:
output_file += '.{:03}'.format(part_number)
else:
output_file = output_file.split('.')
output_file[-1] = '{:03}'.format(part_number)
output_file = '.'.join(output_file)
return output_file
def append_file_part(data, output_file):
if len(data) > 0:
with open(output_file, 'ab') as fo:
fo.write(data)
def build_parts_and_upload(backup_path, output_file, buffer_size, part_size,
oauth, login, password):
remove_file(output_file)
part_number = 0
with sys.stdin.buffer as fi:
while True:
data = fi.read(buffer_size)
append_file_part(data, output_file)
if len(data) == 0 or os.path.getsize(output_file) >= part_size:
path_to_backup_file = '{path}/{file}'.format(
path=backup_path, file=output_file
)
is_uploaded = False
while not is_uploaded:
upload_file_part(
backup_path, login, oauth, output_file, password
)
is_uploaded = is_file_uploaded(
path_to_backup_file, oauth, login, password
)
remove_file(output_file)
if len(data) > 0:
part_number += 1
output_file = gen_next_file_part_name(
output_file, part_number
)
if len(data) == 0:
break
def is_file_uploaded(path, oauth, login, password):
cmd_check = get_command('PROPFIND', path, oauth, login, password)
output = subprocess.check_output(cmd_check, shell=True).decode('utf-8')
is_uploaded = True
if '404 Object Not Found' in output:
is_uploaded = False
return is_uploaded
args = get_args()
TIMESTAMP = get_timestamp()
BUFFER_SIZE = convert_to_megabytes(args.buffer)
PART_SIZE = convert_to_megabytes(args.part)
PATH = get_path(
args.directory, args.server, args.client, args.type
)
PATH_FULL = get_path(
args.directory, args.server, args.client, args.type, True
)
FILE_NAME = get_file_name(args.client, args.type, args.extension)
remove_old_backup(
PATH, args.count, args.oauth, args.login, args.password
)
create_backup_path(
PATH_FULL, args.oauth, args.login, args.password
)
build_parts_and_upload(
PATH_FULL, FILE_NAME, BUFFER_SIZE, PART_SIZE,
args.oauth, args.login, args.password
)
@itcrab
Copy link
Author

itcrab commented Jun 26, 2016

Add check is file uploaded (if no - restart upload).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment