-
-
Save darwing1210/c9ff8e3af8ba832e38e6e6e347d9047a to your computer and use it in GitHub Desktop.
| import os | |
| import asyncio | |
| import aiohttp # pip install aiohttp | |
| import aiofile # pip install aiofile | |
| REPORTS_FOLDER = "reports" | |
| FILES_PATH = os.path.join(REPORTS_FOLDER, "files") | |
| def download_files_from_report(urls): | |
| os.makedirs(FILES_PATH, exist_ok=True) | |
| sema = asyncio.BoundedSemaphore(5) | |
| async def fetch_file(session, url): | |
| fname = url.split("/")[-1] | |
| async with sema: | |
| async with session.get(url) as resp: | |
| assert resp.status == 200 | |
| data = await resp.read() | |
| async with aiofile.async_open( | |
| os.path.join(FILES_PATH, fname), "wb" | |
| ) as outfile: | |
| await outfile.write(data) | |
| async def main(): | |
| async with aiohttp.ClientSession() as session: | |
| tasks = [fetch_file(session, url) for url in urls] | |
| await asyncio.gather(*tasks) | |
| loop = asyncio.get_event_loop() | |
| loop.run_until_complete(main()) | |
| loop.close() |
You should refactor to reuse the session, not creating one for each request.
applied suggestions
Very elegant solution but after experimenting with it for a while, I have started getting errors while downloading files (never the same file and if you try enough times, the downloads for all files succeed):
aiohttp.client_exceptions.ClientPayloadError: Response payload is not completed
Is the solution to change the code to catch that exception and try N times before giving up or is the root cause known?
Very elegant solution but after experimenting with it for a while, I have started getting errors while downloading files (never the same file and if you try enough times, the downloads for all files succeed):
aiohttp.client_exceptions.ClientPayloadError: Response payload is not completedIs the solution to change the code to catch that exception and try N times before giving up or is the root cause known?
Please check aiohttp documentation about ClientPayloadError and yes, you can use aiohttp-retry to handle the failure cases
does this not work for video files?I am getting error 403.
Great piece of code!