annotate grok_interview/async.py @ 60:d64a8c189a77

Merged
author June Park <me@mrjunejune.com>
date Sat, 20 Dec 2025 13:56:01 -0500
parents 68fa88ac73fe
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
51
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
1
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
2 # 1. asynciously down list of urls
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
3 # 2. retries
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
4
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
5
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
6
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
7 import asyncio
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
8 from concurrent.futures import ThreadPoolExecutor
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
9 import random
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
10 from typing import List
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
11 import time
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
12
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
13 async def interface_download_url(url: str, retry_number = 0):
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
14 curr = time.time()
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
15 success = False
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
16 try:
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
17 res = await func_download_url(url)
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
18 success = True
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
19 except:
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
20 res = await interface_download_url(url, retry_number + 1)
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
21 res = None
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
22 long = curr - time.time()
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
23 return success, res, url, long
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
24
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
25 async def func_download_url(url: str):
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
26 await asyncio.sleep(random.randint(1, 10))
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
27
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
28 ALLOWED_BATCH_SIZE = 10
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
29 ALLOWED_RETRY_SIZE = 3
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
30 async def download_multi_urls(urls: List[str], retry_num = 0):
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
31 if retry_num > ALLOWED_RETRY_SIZE:
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
32 return
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
33
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
34 number_of_batch = len(urls) // ALLOWED_BATCH_SIZE + 1
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
35
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
36 errors = []
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
37 for batch_num in range(number_of_batch):
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
38 download_urls = urls[batch_num * ALLOWED_BATCH_SIZE:(batch_num + 1) * ALLOWED_BATCH_SIZE]
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
39 for download_url in download_urls:
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
40 asyncio.run(asyncio.create_task(interface_download_url(download_url)))
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
41 results = await asyncio.gather(*tasks)
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
42 for task in tasks
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
43 asyncio.run()
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
44 for result in results:
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
45 if not result[0]:
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
46 errors.append(result[2])
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
47
68fa88ac73fe Interview prep for xAI
June Park <parkjune1995@gmail.com>
parents:
diff changeset
48 await download_multi_urls(errors, retry_num+1)