Mercurial
diff grok_interview/async.py @ 60:d64a8c189a77
Merged
| author | June Park <me@mrjunejune.com> |
|---|---|
| date | Sat, 20 Dec 2025 13:56:01 -0500 |
| parents | 68fa88ac73fe |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/grok_interview/async.py Sat Dec 20 13:56:01 2025 -0500 @@ -0,0 +1,48 @@ + +# 1. asynciously down list of urls +# 2. retries + + + +import asyncio +from concurrent.futures import ThreadPoolExecutor +import random +from typing import List +import time + +async def interface_download_url(url: str, retry_number = 0): + curr = time.time() + success = False + try: + res = await func_download_url(url) + success = True + except: + res = await interface_download_url(url, retry_number + 1) + res = None + long = curr - time.time() + return success, res, url, long + +async def func_download_url(url: str): + await asyncio.sleep(random.randint(1, 10)) + +ALLOWED_BATCH_SIZE = 10 +ALLOWED_RETRY_SIZE = 3 +async def download_multi_urls(urls: List[str], retry_num = 0): + if retry_num > ALLOWED_RETRY_SIZE: + return + + number_of_batch = len(urls) // ALLOWED_BATCH_SIZE + 1 + + errors = [] + for batch_num in range(number_of_batch): + download_urls = urls[batch_num * ALLOWED_BATCH_SIZE:(batch_num + 1) * ALLOWED_BATCH_SIZE] + for download_url in download_urls: + asyncio.run(asyncio.create_task(interface_download_url(download_url))) + results = await asyncio.gather(*tasks) + for task in tasks + asyncio.run() + for result in results: + if not result[0]: + errors.append(result[2]) + + await download_multi_urls(errors, retry_num+1)