PsiACE/s3fs-r2.py

## s3fs-r2.py
import pandas as pd
import numpy as np
# git clone https://github.com/fsspec/s3fs/
# cd s3fs
# pip install -e .
import s3fs
print(s3fs.__file__)

# A ~300MB file with compression='none'
row_count = 18000000
df = pd.DataFrame({
    'A': np.random.randn(row_count),  # float64
    'B': np.random.randint(0, 100, size=row_count),  # int32
    'C': np.random.randn(row_count),  # float64
    'D': np.random.randint(0, 100, size=row_count)  # int32
})

# r2 info
access_key_id = 'xxxx'
secret_access_key = 'yyyy'
endpoint_url = 'https://zzzz.r2.cloudflarestorage.com'

# https://github.com/fsspec/s3fs/pull/888
fs = s3fs.S3FileSystem(key=access_key_id, secret=secret_access_key, client_kwargs={'endpoint_url': endpoint_url}, fixed_upload_size=True)

# set up path
r2_path = 'bucket/file.parquet'

# upload file
with fs.open(r2_path, 'wb') as f:
    df.to_parquet(f, engine='pyarrow', compression='none') # ~300MB file

print("File uploaded successfully to Cloudflare R2.")
	import pandas as pd
	import numpy as np
	# git clone https://github.com/fsspec/s3fs/
	# cd s3fs
	# pip install -e .
	import s3fs
	print(s3fs.__file__)

	# A ~300MB file with compression='none'
	row_count = 18000000
	df = pd.DataFrame({
	'A': np.random.randn(row_count), # float64
	'B': np.random.randint(0, 100, size=row_count), # int32
	'C': np.random.randn(row_count), # float64
	'D': np.random.randint(0, 100, size=row_count) # int32
	})

	# r2 info
	access_key_id = 'xxxx'
	secret_access_key = 'yyyy'
	endpoint_url = 'https://zzzz.r2.cloudflarestorage.com'

	# https://github.com/fsspec/s3fs/pull/888
	fs = s3fs.S3FileSystem(key=access_key_id, secret=secret_access_key, client_kwargs={'endpoint_url': endpoint_url}, fixed_upload_size=True)

	# set up path
	r2_path = 'bucket/file.parquet'

	# upload file
	with fs.open(r2_path, 'wb') as f:
	df.to_parquet(f, engine='pyarrow', compression='none') # ~300MB file

	print("File uploaded successfully to Cloudflare R2.")
No results found