Mercurial
annotate asyncio_threads/inference/main.py @ 71:75de5903355c
Giagantic changes that update Dowa library to be more align with stb style array and hashmap. Updated Seobeo to be caching on server side instead of file level caching. Deleted bunch of things I don't really use.
| author | June Park <parkjune1995@gmail.com> |
|---|---|
| date | Sun, 28 Dec 2025 20:34:22 -0800 |
| parents | 46daba6e3cf4 |
| children |
| rev | line source |
|---|---|
|
48
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
1 from typing import List, Dict, Any, Optional |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
2 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
3 class UserRequest: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
4 """Represents an incoming user request.""" |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
5 def __init__(self, request_id: int, prompt: str): |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
6 self.request_id = request_id |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
7 self.prompt = prompt |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
8 self.output_tokens: List[str] = [] # Stores tokens as they are generated |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
9 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
10 def __repr__(self): |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
11 return f"Request(ID={self.request_id}, Prompt='{self.prompt[:20]}...', Tokens={len(self.output_tokens)})" |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
12 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
13 # --- Mock LLM Interface --- |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
14 def mock_inference_call(prompts: List[str], batch_id: int) -> Dict[str, Any]: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
15 """ |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
16 Simulates the call to the underlying LLM/GPU. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
17 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
18 In a real scenario, this returns generated tokens and associated metadata. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
19 For this mock, we return a flat list of tokens, one for each request |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
20 in the batch, and the batch ID for verification. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
21 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
22 The length of the tokens list MUST equal the length of the prompts list. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
23 """ |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
24 print(f" [INFERENCE] Running Batch {batch_id} with {len(prompts)} requests...") |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
25 results = { |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
26 'batch_id': batch_id, |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
27 # Simulate generating a single new token for each request in the batch |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
28 'generated_tokens': [ |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
29 f"token_{i+1}_of_{batch_id}" for i, _ in enumerate(prompts) |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
30 ] |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
31 } |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
32 return results |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
33 # ------------------------- |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
34 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
35 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
36 class BatchInferenceEngine: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
37 """ |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
38 A simplified inference engine that handles request batching and |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
39 token-level result distribution. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
40 """ |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
41 def __init__(self, batch_size: int = 4): |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
42 self.batch_size = batch_size |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
43 self.request_queue: List[UserRequest] = [] |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
44 self.next_batch_id = 1 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
45 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
46 def enqueue_request(self, request: UserRequest) -> None: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
47 """ |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
48 Adds a new request to the queue and triggers batch processing if |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
49 the batch size is reached. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
50 """ |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
51 # --- YOUR CODE HERE --- |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
52 # 1. Add the request to the queue. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
53 # 2. Check if the queue size meets or exceeds self.batch_size. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
54 # 3. If so, call self._process_batch(). |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
55 # ---------------------- |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
56 self.request_queue.append(request) |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
57 while len(self.request_queue) > self.batch_size: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
58 self._process_batch() |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
59 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
60 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
61 def _process_batch(self) -> None: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
62 """ |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
63 Executes the inference call for the current batch and distributes |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
64 the results back to the individual requests. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
65 """ |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
66 batch = self.request_queue[:self.batch_size] |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
67 prompts = map(lambda x : x.prompt, batch) |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
68 results = mock_inference_call(list(prompts), self.next_batch_id) |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
69 for request in self.request_queue: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
70 request.output_tokens = results["generated_tokens"] |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
71 self.request_queue[self.batch_size:] |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
72 self.next_batch_id += 1 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
73 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
74 def get_results(self, request_id: int) -> Optional[List[str]]: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
75 """ |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
76 In a real system, this would retrieve results from a separate |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
77 completed-requests store. For this mock, assume we can only |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
78 retrieve results for requests that have been fully processed and |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
79 are no longer in the queue. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
80 """ |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
81 # For simplicity, assume all requests that have been processed |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
82 # by a batch call have completed their generation for this *single step* |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
83 # of the mock. If you want to make this more realistic, feel free to |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
84 # expand the UserRequest class to include a 'is_complete' flag. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
85 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
86 # For the provided mock structure, we'll just check the queue: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
87 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
88 for req in self.request_queue: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
89 if req.request_id == request_id: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
90 # If it's still in the queue, it hasn't been processed yet |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
91 return None |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
92 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
93 # In a complete system, you'd look up the request ID in a completed-requests map. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
94 # For this simplified version, let's just return a simulated result for |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
95 # requests that *would* have been processed: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
96 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
97 # If the request ID is less than the ID of the requests that would be |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
98 # processed in the *next* batch, we simulate a complete token output. |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
99 if request_id < self.next_batch_id * self.batch_size: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
100 # Simple simulation: |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
101 return [f"token_X_of_{batch_num}" for batch_num in range(1, self.next_batch_id)] |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
102 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
103 return None |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
104 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
105 |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
106 def main(): |
|
46daba6e3cf4
Few python scrtips to show how to use asychio.
MrJuneJune <me@mrjunejune.com>
parents:
diff
changeset
|
107 pass |