view grok_interview/async.py @ 54:b3e82d22f961

[PostDog] Initial commit BROKEN
author June Park <parkjune1995@gmail.com>
date Fri, 19 Dec 2025 13:58:52 -0800
parents 68fa88ac73fe
children
line wrap: on
line source


# 1. asynciously down list of urls
# 2. retries



import asyncio
from concurrent.futures import ThreadPoolExecutor
import random
from typing import List
import time

async def interface_download_url(url: str, retry_number = 0):
    curr = time.time()
    success = False
    try:
        res = await func_download_url(url)
        success = True
    except:
        res = await interface_download_url(url, retry_number + 1)
        res = None
    long = curr - time.time()
    return success, res, url, long
    
async def func_download_url(url: str):
    await asyncio.sleep(random.randint(1, 10))

ALLOWED_BATCH_SIZE = 10
ALLOWED_RETRY_SIZE = 3
async def download_multi_urls(urls: List[str], retry_num = 0):
    if retry_num > ALLOWED_RETRY_SIZE:
        return

    number_of_batch = len(urls) // ALLOWED_BATCH_SIZE + 1

    errors = []
    for batch_num in range(number_of_batch):
        download_urls = urls[batch_num * ALLOWED_BATCH_SIZE:(batch_num + 1) * ALLOWED_BATCH_SIZE]
        for download_url in download_urls:
            asyncio.run(asyncio.create_task(interface_download_url(download_url)))
        results = await asyncio.gather(*tasks)
        for task in tasks
        asyncio.run()
        for result in results:
            if not result[0]:
                errors.append(result[2])

    await download_multi_urls(errors, retry_num+1)