Skip to content

Instantly share code, notes, and snippets.

@r-a-o
Created October 8, 2024 08:03
Show Gist options
  • Select an option

  • Save r-a-o/14c865fff132f0cc685b70aa51ace027 to your computer and use it in GitHub Desktop.

Select an option

Save r-a-o/14c865fff132f0cc685b70aa51ace027 to your computer and use it in GitHub Desktop.
Upload multiple files and folders(retaining structure) to Cloudflare R2 via Boto3 (takes confirmation before upload, shows upload progress, has ignore list)
import boto3
from botocore.config import Config
from boto3.s3.transfer import TransferConfig, S3Transfer
import argparse
import os
import sys
import threading
# Constants
GB = 1024 ** 3
config = TransferConfig(multipart_threshold=1 * GB)
endpoint_url = 'https://XXX.r2.cloudflarestorage.com'
aws_access_key_id = 'XXX'
aws_secret_access_key = 'XXX'
bucket = 'YOUR_BUCKET'
root_dir = 'YOUR_ROOT_DIR'
ignored_files_list = ['.DS_Store', 'Thumbs.db', 'desktop.ini']
# Create an S3 client for Cloudflare R2
r2_client = boto3.client(
's3',
endpoint_url=endpoint_url,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
config=Config(signature_version='s3v4'),
region_name='auto'
)
transfer_manager = S3Transfer(client=r2_client, config=config)
# Progress tracker
class ProgressPercentage:
def __init__(self, file_path, current_file_index, total_files):
self._file_size = float(os.path.getsize(file_path))
self._lock = threading.Lock()
self._bytes_transferred = 0
self.current_file_index = current_file_index
self.total_files = total_files
def __call__(self, bytes_transferred):
with self._lock:
self._bytes_transferred += bytes_transferred
percentage = (self._bytes_transferred / self._file_size) * 100
sys.stdout.write(f"\r[{self.current_file_index}/{self.total_files}] Upload Progress: {percentage:.2f}%")
sys.stdout.flush()
# Upload a single file
def upload_single_file(local_path, s3_key, current_file_index, total_files):
try:
print(f"\nUploading {local_path} to bucket '{bucket}' as '{s3_key}'...")
transfer_manager.upload_file(
local_path,
bucket,
s3_key,
callback=ProgressPercentage(local_path, current_file_index, total_files) # Progress tracking with file count
)
print("\nFile uploaded successfully.")
except Exception as e:
print(f"\nError occurred during file upload: {e}")
# Get files inside folder
def get_files_to_upload(folder_path):
files_to_upload = []
# Walk through the folder and its subfolders
for root, dirs, files in os.walk(folder_path):
for file in files:
if file in ignored_files_list: # Skip unwanted files
continue
local_file_path = os.path.join(root, file)
files_to_upload.append(local_file_path)
return files_to_upload
# Process upload
def process_upload(paths):
all_files_to_upload = []
# Collect files from each path
for path in paths:
if os.path.isfile(path):
if os.path.basename(path) not in ignored_files_list:
relative_path = os.path.relpath(path, start=os.path.dirname(os.path.dirname(path)))
s3_key = f"{root_dir}/{relative_path}"
all_files_to_upload.append((path, s3_key))
else:
print(f"Skipping ignored file: {path}")
elif os.path.isdir(path):
files_to_upload = get_files_to_upload(path)
files_to_upload.sort() # Sort files alphabetically
for file in files_to_upload:
relative_path = os.path.relpath(file, start=os.path.dirname(path))
s3_key = f"{root_dir}/{relative_path}"
all_files_to_upload.append((file, s3_key))
else:
print(f"Error: '{path}' is not a valid file or folder.")
# List all the files that will be uploaded
if all_files_to_upload:
print("\nThe following files will be uploaded:")
for local_path, s3_key in all_files_to_upload:
print(f"Local: {local_path} -> Remote: {bucket}/{s3_key}")
# Single confirmation for all files
proceed = input("\nProceed with uploading all these files? (y/n): ")
if proceed.lower() == 'y':
total_files = len(all_files_to_upload)
for i, (local_path, s3_key) in enumerate(all_files_to_upload, 1): # Enumerate starting from 1
upload_single_file(local_path, s3_key, i, total_files) # UPLOAD
else:
print("Upload aborted.")
else:
print("No valid files found for upload.")
# Argument parser
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Upload files or folders to Cloudflare R2 with progress reporting.')
parser.add_argument('paths', nargs='+', help='Paths to the files or folders to be uploaded')
args = parser.parse_args()
# Call the upload function with the list of paths provided
process_upload(args.paths)
@r-a-o
Copy link
Author

r-a-o commented Oct 8, 2024

To run this py script, it needs a virtual env, run below command to create a venv

python -m venv .venv # This creates a new virtual env ".venv" in current dir
source .venv # This activates the new virtual env in your terminal
python -m pip install boto3 # This installs boto3

Once you have done the setup, update endpoint_url, aws_access_key_id, aws_secret_access_key, bucket & root_dir in the above file.

Run the following command for upload:

upload_to_r2.py /first-folder /second-folder third-file.txt

The script then lists all the files inside /first-folder & /second-folder folders along with the individual file third-file.txt as files to upload, press y to continue with upload. The script maintains the folder structure of selected folder(s), so the remote will reflect your local file folder structure.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment