-
-
Save hannesdatta/10422a6fbb584f245c83361245335741 to your computer and use it in GitHub Desktop.
| ################################################################ | |
| # DOWNLOAD ENTIRE FOLDER STRUCTURE FROM DROPBOX TO LOCAL DRIVE # | |
| ################################################################ | |
| # Instructions: | |
| # (1) install dropbox API using pip | |
| # > pip install dropbox | |
| # (2) Create application to make requests to the Dropbox API | |
| # - Go to: https://dropbox.com/developers/apps | |
| # - Register your own App - e.g., call it "personal access to research data" | |
| # - Copy secret *access token* after registering your app (click on get token) | |
| # Paste that access token to a file called *token_dropbox.txt*. | |
| # Make sure you do not version this file on Git, as it would allow others | |
| # to obtain data from your Dropbox. For example, you can add that file name | |
| # to .gitignore. | |
| import dropbox | |
| from get_dropbox import get_folders, get_files, wipe_dir | |
| # read access token | |
| access_token = open('token_dropbox.txt').read() | |
| # Authenticate with Dropbox | |
| print('Authenticating with Dropbox...') | |
| dbx = dropbox.Dropbox(access_token) | |
| print('...authenticated with Dropbox owned by ' + dbx.users_get_current_account().name.display_name) | |
| # (3) Obtain ID of folder that needs to be downloaded | |
| # folders = get_folders(), which generates a list with ID numbers for each folder | |
| # in your Dropbox (may take some time!!!) | |
| # Specifiy a path (if you know that path) for a directory "close" to your target | |
| # directory. Otherwise, this script will loop through the *entire* file structure | |
| # of your Dropbox, which will take a lot of time. | |
| folders=get_folders(dbx, '/Tilburg/Projects/Project_Loyalty_Crises_Program/raw_data') | |
| # Let's take a look at these folder IDs | |
| folders | |
| # Select target folder and copy desired folder ID below | |
| folder_id = 'id:i34YqK3uj6IAAAAAAAJ3bQ' | |
| # Set target download directory on your local computer; ends with (e.g., raw_data/) | |
| download_dir = 'raw_data/' | |
| ################## | |
| # DOWNLOAD FILES # | |
| ################## | |
| # obtain list of files of target dir | |
| print('Obtaining list of files in target directory...') | |
| get_files(dbx, folder_id, download_dir) |
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Wed Mar 4 06:31:48 2020 | |
| @author: hdatta | |
| """ | |
| import dropbox | |
| import os | |
| import shutil | |
| # Find folder ID | |
| def get_folders(dbx, folder): | |
| result = dbx.files_list_folder(folder, recursive=True) | |
| folders=[] | |
| def process_dirs(entries): | |
| for entry in entries: | |
| if isinstance(entry, dropbox.files.FolderMetadata): | |
| folders.append(entry.path_lower + '--> ' + entry.id) | |
| process_dirs(result.entries) | |
| while result.has_more: | |
| result = dbx.files_list_folder_continue(result.cursor) | |
| process_dirs(result.entries) | |
| return(folders) | |
| def wipe_dir(download_dir): | |
| # wipe download dir | |
| try: | |
| shutil.rmtree(download_dir) | |
| except: | |
| 1+1 | |
| def get_files(dbx, folder_id, download_dir): | |
| assert(folder_id.startswith('id:')) | |
| result = dbx.files_list_folder(folder_id, recursive=True) | |
| # determine highest common directory | |
| assert(result.entries[0].id==folder_id) | |
| common_dir = result.entries[0].path_lower | |
| file_list = [] | |
| def process_entries(entries): | |
| for entry in entries: | |
| if isinstance(entry, dropbox.files.FileMetadata): | |
| file_list.append(entry.path_lower) | |
| process_entries(result.entries) | |
| while result.has_more: | |
| result = dbx.files_list_folder_continue(result.cursor) | |
| process_entries(result.entries) | |
| print('Downloading ' + str(len(file_list)) + ' files...') | |
| i=0 | |
| for fn in file_list: | |
| i+=1 | |
| printProgressBar(i, len(file_list)) | |
| path = remove_suffix(download_dir, '/') + remove_prefix(fn, common_dir) | |
| try: | |
| os.makedirs(os.path.dirname(os.path.abspath(path))) | |
| except: | |
| 1+1 | |
| dbx.files_download_to_file(path, fn) | |
| # auxilary function to print iterations progress (from https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console) | |
| def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'): | |
| """ | |
| Call in a loop to create terminal progress bar | |
| @params: | |
| iteration - Required : current iteration (Int) | |
| total - Required : total iterations (Int) | |
| prefix - Optional : prefix string (Str) | |
| suffix - Optional : suffix string (Str) | |
| decimals - Optional : positive number of decimals in percent complete (Int) | |
| length - Optional : character length of bar (Int) | |
| fill - Optional : bar fill character (Str) | |
| """ | |
| percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) | |
| filledLength = int(length * iteration // total) | |
| bar = fill * filledLength + '-' * (length - filledLength) | |
| print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r') | |
| # Print New Line on Complete | |
| if iteration == total: | |
| print() | |
| # inspired by https://stackoverflow.com/questions/16891340/remove-a-prefix-from-a-string and | |
| # https://stackoverflow.com/questions/1038824/how-do-i-remove-a-substring-from-the-end-of-a-string-in-python | |
| def remove_prefix(text, prefix): | |
| return text[text.startswith(prefix) and len(prefix):] | |
| def remove_suffix(text, suffix): | |
| return text[:-(text.endswith(suffix) and len(suffix))] |
| PASTE ACCESS TOKEN HERE |
Fixed - You need to set permissions for file content/metadata reading first, and after that create the access token.
I inserted file_list = sorted(file_list) at line 60 to make it easier to track how far the process went before failing (it sometimes fails if the connection is interrupted during large file transfers)
I've added a small check in get_files.py:72 to skip files that already have been downloaded. This makes possible to continue where we stopped.
if not os.path.isfile(path):
dbx.files_download_to_file(path, fn)
Thanks for the code!
I used it and it works nicely for my own dropbox.
Now I'd like to use it to download all folders/files from another dropbox, for which I got a shared link (about 100 large files, too large to get zip'ed, too lage to copy to my own dropbox). How do I need to adjust the code?
Help very much appreciated.
I'm having some issues running this script. Auth seems to work fine (No issues wrapping it in a try/ except and it's showing my username correctly), but as soon as it gets to get_folder it errors:
In my Dropbox App I've set permissions so the File metadata.read box is checked, and settings are saved.
I'm not sure if I have to include a scope in the Auth request, and if so, how to do that?
(I have to download a large Dropbox folder to a non-dropbox folder on om my external hard disk, and don't want to manually have to download a ZIP for the 100+ subfolders.. Antwoord kan ook in het Nederlands, alvast bedankt!)