# HG changeset patch # User June Park # Date 1766722246 28800 # Node ID 4bc56e88e1f30149566eabb06a5391f8f96bb3e8 # Parent 551d9fc0a2ba024e27ed147837aa01478d33bf24 Remove unnecessary files. diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/LRU.py --- a/grok_interview/LRU.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ -from typing import Optional - - -class Node: - def __init__(self, key, value): - self.key = key - self.value = value - self.next: Optional[Node] = None - self.prev: Optional[Node] = None - -class LRUCache: - - def __init__(self, capacity: int): - self.capacity = capacity - self.map = {} - self.head = Node(-1, -1) - self.tail = Node(-1, -1) - self.head.next, self.tail.prev = self.tail, self.head - - def get(self, key: int) -> int: - if not key in self.map: - return -1 - node = self.map[key] - self._deleteAndAdd(node) - return node.value - - def put(self, key: int, value: int) -> None: - newNode = Node(key, value) - if key in self.map: - self._delete(self.map[key]) - else: - if len(self.map.keys())+1 > self.capacity: - del self.map[self.head.next.key] - self._delete(self.head.next) - self._add(newNode) - self.map[key] = newNode - - def _deleteAndAdd(self, node): - self._delete(node) - self._add(node) - - def _delete(self, node): - node.prev.next, node.next.prev = node.next, node.prev - - def _add(self, node): - node.prev, node.next, self.tail.prev.next, self.tail.prev = self.tail.prev, self.tail, node, node diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/README.md --- a/grok_interview/README.md Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -# Distributed Cache (Focus: Hashing & System Logic) - -- Question: Implement a Cache Router - - Level 1 (Easy - Modulo Hashing): Write a function that maps a key (string) to one of N servers. - - Task: Implement getServerIndex(key, serverCount). - - Expected Logic: Simple hash modulo: hash(key) % serverCount. - - Critique: The interviewer will ask, "What happens to the existing keys if we add one server (change N to N+1)?" (Answer: Almost all keys get remapped, causing a massive cache miss storm). - - Level 2 (Medium - Consistent Hashing): Solve the remapping problem. Implement a ConsistentHash class. - - Task: Map servers to points on a circle (0 to 360 degrees or a large integer range). Map keys to the same circle. A key belongs to the first server it encounters moving clockwise. - - Goal: Prove that adding a server only requires remapping the keys that fall between the new server and its predecessor. - - Level 3 (Hard - Virtual Nodes): The servers have uneven capacity, or the data distribution is skewed (some segments of the ring have way more keys). - - Task: Modify the class so that each physical server exists at multiple points on the ring (e.g., "Server A" is at positions 10, 150, and 290). - - Goal: Distribute load more evenly and handle "hot spots" better. - ---- - -Questions: - - - Async - - Threads - - DB - - Cache - - KV - - Distribution - - ---- - -FE - - - Fetching URL - - Storing it as useMemo and what not. - diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/alphabet_search.py --- a/grok_interview/alphabet_search.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ -# Given an m x n grid of characters board and a string word, return true if word exists in the grid. -# -# The word can be constructed from letters of sequentially adjacent cells, where adjacent cells are horizontally or vertically neighboring. The same letter cell may not be used more than once. -# -# -# -# Example 1: -# -# -# Input: board = [ -# ["A","B","C","E"], -# ["S","F","C","S"], -# ["A","D","E","E"] -# ], word = "ABCCED" -# Output: true - -def main(board, word): - ROWS = len(board) - COLS = len(board[0]) - - pos = 0 - - directions = [ - (0, 1), - (1, 0), - (0, -1), - (-1, 0), - ] - def dfs(row, col, pos): - # import pdb; pdb.set_trace() - if pos == len(word): - return True - - if (row < 0 or row >= ROWS) or (col < 0 or col >= COLS) or board[row][col] == "#": - return False - - letter = board[row][col] - if letter == word[pos]: - pos += 1 - board[row][col] = "#" - for direction in directions: - if dfs(row + direction[0], col + direction[1], pos): - return True - board[row][col] = letter - - for r in range(ROWS): - for c in range(COLS): - if dfs(r, c, 0): - return True - return False - - -board = [["b"],["a"],["b"],["b"],["a"]] -word = "baa" - -print(main(board, word)) diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/async.py --- a/grok_interview/async.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ - -# 1. asynciously down list of urls -# 2. retries - - - -import asyncio -from concurrent.futures import ThreadPoolExecutor -import random -from typing import List -import time - -async def interface_download_url(url: str, retry_number = 0): - curr = time.time() - success = False - try: - res = await func_download_url(url) - success = True - except: - res = await interface_download_url(url, retry_number + 1) - res = None - long = curr - time.time() - return success, res, url, long - -async def func_download_url(url: str): - await asyncio.sleep(random.randint(1, 10)) - -ALLOWED_BATCH_SIZE = 10 -ALLOWED_RETRY_SIZE = 3 -async def download_multi_urls(urls: List[str], retry_num = 0): - if retry_num > ALLOWED_RETRY_SIZE: - return - - number_of_batch = len(urls) // ALLOWED_BATCH_SIZE + 1 - - errors = [] - for batch_num in range(number_of_batch): - download_urls = urls[batch_num * ALLOWED_BATCH_SIZE:(batch_num + 1) * ALLOWED_BATCH_SIZE] - for download_url in download_urls: - asyncio.run(asyncio.create_task(interface_download_url(download_url))) - results = await asyncio.gather(*tasks) - for task in tasks - asyncio.run() - for result in results: - if not result[0]: - errors.append(result[2]) - - await download_multi_urls(errors, retry_num+1) diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/binary_search.py --- a/grok_interview/binary_search.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ - -x = [1,2,3,4,5,9,20,25,33,99] -# | | - -target = 18 -left = 0 -right = len(x) - -while left < right: - mid = (left + right)//2 - if x[mid] == target: - break - elif x[mid] < target: - left = mid + 1 - else: - right = mid - -print(x[mid]) diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/bucket_try_2.py --- a/grok_interview/bucket_try_2.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -# Context -# -# Please implement two functions: refillTokenBucket and useTokens. Two classes are already defined: DistributedCache and TokenBucket. -# -# refillTokenBucket(user_id): Refill tokens for the specified user's bucket. -# -# useTokens(user_id, tokens): Check if there are enough tokens in the specified user's token bucket. If so, update the remaining token count and return true; otherwise, return false. You are required to use instances of the existing classes to implement these functions. - -import asyncio -from typing import Dict - - -class DistributedCache: - - def __init__(self): - self._hashmap: Dict[str, TokenBucket] = {} - - def add_user(self, user_id: str): - self._hashmap[user_id] = TokenBucket() - - def get_token_bucket(self, user_id: str): - return self._hashmap[user_id] - -class TokenBucket: - - def __init__(self, initial_token_value: int = 100): - self._tokens = initial_token_value - self.initial_token_value = initial_token_value - self._lock = asyncio.Lock() - - async def consume(self, token: int): - async with self._lock: - tokens = self.get_token() - await asyncio.sleep(0.1) - if tokens < token: - return False - tokens -= token - self.set_token(tokens) - return True - - async def refill(self): - async with self._lock: - self._tokens = self.initial_token_value - - def get_token(self): - return self._tokens - - def set_token(self, token: int): - self._tokens = token - -distCache = DistributedCache() -distCache.add_user("june") - -async def refillTokenBucket(user_id: str): - await distCache.get_token_bucket(user_id).refill() - -async def useTokens(user_id: str, token: int): - return await distCache.get_token_bucket(user_id).consume(token) - -async def main(): - await asyncio.gather(useTokens("june", 10), useTokens("june", 10), useTokens("june", 10)) - print(distCache.get_token_bucket("june").get_token()) - -asyncio.run(main()) - diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/choice.py --- a/grok_interview/choice.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -import random - -print(random.choice([ - 'Bucket questions', - 'Inference Questions', - 'Bank Account', - 'Job Schedular', - 'Alphabet Word Search II', - 'Least Recently Used (LRU) Cache Implementation', - 'In-Memory Key-Value Store with Nested Transactions', -])) diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/consistent_hash.py --- a/grok_interview/consistent_hash.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -# Write a function taht maps -# Key string to one of N servers -from bisect import bisect_left -from hashlib import sha256 - -class Server: - - def __init__(self): - self.id = str(sha256()) - -class ConsistentHash: - def __init__(self): - self.server = [] - self.hash_key_to_server = {} - self.hash_range = 10000 - - def get_hash_pos(self, key: str) -> int: - x = sha256(bytes(key, "utf-8")) - return int.from_bytes(x.digest()[:8]) % self.hash_range - - def add_server(self, server: Server): - self.server.append(server) - for i in range(len(self.server) + 1): - hash_key = self.get_hash_pos(server.id) - self.hash_key_to_server[hash_key] = i - - def get_server(self, key): - hash_key = self.get_hash_pos(key) - return self.server[bisect_left(list(self.hash_key_to_server.keys()), hash_key)] - - def remove_server(self, server): - hash_key = self.get_hash_pos(server.id) diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/inference.py --- a/grok_interview/inference.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,146 +0,0 @@ -import asyncio -import time -from typing import List, Dict, Any, Optional -import uuid -from dataclasses import dataclass, field - -@dataclass -class InferenceRequest: - prompt: str - request_id: str = field(default_factory=lambda: str(uuid.uuid4())) - max_tokens: int = 128 - created_at: float = field(default_factory=time.time) - -@dataclass -class TokenOutput: - token: str - request_id: str - token_index: int # position in this request's generation - is_complete: bool = False - -class BatchInferenceEngine: - - def __init__(self, batch_size: int = 8, timeout: float = 0.1): - """ - Args: - batch_size: Maximum number of requests to process in one inference call - timeout: Max time to wait for batch to fill before processing (in seconds) - """ - self.batch_size = batch_size - self.timeout = timeout - - self.queue: List[InferenceRequest] = [] - self.pending_requests: Dict[str, List[str]] = {} # request_id -> list of generated tokens - self.completion_futures: Dict[str, asyncio.Future] = {} - - self._lock = asyncio.Lock() - self._batch_ready_event = asyncio.Event() - - async def submit_request(self, prompt: str, max_tokens: int = 128) -> asyncio.Future[str]: - """ - Submit a new inference request and return a Future that resolves to the generated text. - """ - request = InferenceRequest(prompt=prompt, max_tokens=max_tokens) - - future = asyncio.get_event_loop().create_future() - - async with self._lock: - self.queue.append(request) - self.pending_requests[request.request_id] = [] - self.completion_futures[request.request_id] = future - - # Trigger batch processing if we've reached batch size - if len(self.queue) >= self.batch_size: - self._batch_ready_event.set() - - # Start background worker if not already running - asyncio.create_task(self._worker()) - - return future - - async def _worker(self): - """Background task that processes batches when ready""" - while True: - # Wait until we have requests or timeout triggers - try: - await asyncio.wait_for(self._batch_ready_event.wait(), timeout=self.timeout) - except asyncio.TimeoutError: - pass - - async with self._lock: - if not self.queue: - self._batch_ready_event.clear() - continue - - # Take up to batch_size requests - current_batch = self.queue[:self.batch_size] - self.queue = self.queue[self.batch_size:] - - # Reset event if queue is now empty - if not self.queue: - self._batch_ready_event.clear() - - # Simulate batched LLM inference - token_outputs: List[TokenOutput] = self._simulate_batched_inference(current_batch) - - # Distribute tokens back to individual requests - async with self._lock: - for token_out in token_outputs: - req_id = token_out.request_id - self.pending_requests[req_id].append(token_out.token) - - # Check if this request is complete - if token_out.is_complete: - full_text = ''.join(self.pending_requests[req_id]) - future = self.completion_futures.pop(req_id) - future.set_result(full_text) - del self.pending_requests[req_id] - - def _simulate_batched_inference(self, batch: List[InferenceRequest]) -> List[TokenOutput]: - """ - Simulates a real LLM forward pass on a batch. - In real implementation, this would call model.generate() with padded batch. - """ - outputs: List[TokenOutput] = [] - - for req in batch: - prompt_len = len(req.prompt.split()) # rough estimate - num_tokens_to_generate = min(req.max_tokens, 50 + hash(req.request_id) % 30) - - # Simulate token-by-token generation - token_str = f"{req.prompt}" - is_complete = (i == num_tokens_to_generate - 1) - - outputs.append(TokenOutput( - token=token_str, - request_id=req.request_id, - token_index=i, - is_complete=is_complete - )) - - # In streaming: could yield early here in real streaming setup - - return outputs - -# Example usage -async def main(): - engine = BatchInferenceEngine(batch_size=4, timeout=0.5) - - # Submit several requests - futures = [ - engine.submit_request("Tell me a joke about cats"), - engine.submit_request("Explain quantum computing in simple terms"), - engine.submit_request("Write a haiku about AI"), - engine.submit_request("What is the meaning of life?"), - ] - - print("Requests submitted, waiting for results...") - - # Wait for all to complete - results = await asyncio.gather(*futures) - - for i, text in enumerate(results): - print(f"Request {i}: {text.result()}\n\n\n") - -if __name__ == "__main__": - asyncio.run(main()) diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/inference_my_version.py --- a/grok_interview/inference_my_version.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -""" -Inference Questions - -Context - -You are tasked with building a simplified inference engine component responsible for handling incoming user requests for a large language model (LLM). To optimize throughput and GPU utilization, the engine must batch multiple requests together, run the inference call once per batch, and then deconstruct the results to return token-level output to the individual users. - -Objective - -Complete the provided Python class, BatchInferenceEngine by implementing the methods necessary to: - -Queue incoming user requests. - -Process a batch when the queue reaches a defined batch size. -Simulate the token-level output from an LLM and correctly associate each generated token with its original request. -""" - -import asyncio -from dataclasses import dataclass, field -from time import time -from typing import Dict, List -import uuid - -@dataclass -class UserRequest: - prompt: str - id: str = field(default_factory=lambda: str(uuid.uuid4())) - created_at: float = field(default_factory=time) - - -@dataclass -class TokenOutput: - request_id: str - token: bytes - -class BatchInferenceEngine: - - def __init__(self, batch_sizes: int = 8): - self.queue = [] - self.request_token_map: Dict[str, str] = {} - self.batch_sizes = batch_sizes - - self._lock = asyncio.Lock() - self._batch_event = asyncio.Event() - - async def add_to_queue(self, request: UserRequest): - async with self._lock: - self.queue.append(request) - - if len(self.queue) > self.batch_sizes: - self._batch_event.set() - - task = asyncio.create_task(self._batch()) - return task - - async def _batch(self): - while True: - try: - await asyncio.wait_for(self._batch_event.wait(), timeout=0.05) - except: - raise Exception("Timed out") - - async with self._lock: - if not self.queue: - self._batch_event.clear() - continue - - batch = self.queue[:self.batch_sizes] - tokens = await self._handle_prompt_to_token(batch) - - return tokens - - - async def _handle_prompt_to_token(self, batch: List[UserRequest]): - pass diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/job_scheduler.py --- a/grok_interview/job_scheduler.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -# Job Schedular -# -# Implement a concurrent job scheduler that can schedule multiple workers to complete job tasks. The scheduler must handle asynchronous tasks and be able to dynamically increase or decrease the number of workers. It must handle task execution failures, for example, by retrying or logging. Provide examples with at least three different tasks and examples of scheduling strategies. - -import asyncio -from asyncio import Task -import random - -# Rename to match standard library function -async def job(name: str, delay: int = 1): - # Use asyncio.sleep - await asyncio.sleep(delay) - val = random.randint(1, 10) - if val > 5: - raise Exception("Something went wrong") - else: - print(name) - -class JobScheduler: - def __init__(self): - self._jobs: list[Task] = [] - - def add_task(self, task: Task): - self._jobs.append(task) - print(f"Added job: {task.get_name()}") # Added for debugging - - async def run(self): - print("Starting concurrent jobs...") - await asyncio.gather(*self._jobs, return_exceptions=True) - print("All jobs completed.") - -async def main(): - job_scheduler = JobScheduler() - # Use asyncio.create_task and provide a name - job_scheduler.add_task(asyncio.create_task(job("JUNE", 3), name="JUNE_JOB")) - job_scheduler.add_task(asyncio.create_task(job("PARK", 2), name="PARK_JOB")) - job_scheduler.add_task(asyncio.create_task(job("HELLO", 1), name="HELLO_JOB")) - await job_scheduler.run() - - -if __name__ == "__main__": - # Run the main asynchronous entry point - asyncio.run(main()) diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/lru_version2.py --- a/grok_interview/lru_version2.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -# -# Question: Least Recently Used (LRU) Cache Implementation -# -# Design and implement a Least Recently Used (LRU) cache. The cache should be initialized with a positive integer capacity. -# -# Your implementation must support the following operations with $O(1)$ time complexity for both: -# -# get(key): Retrieve the value of the item identified by key. If the key exists, return its value. If it doesn't, return $-1$. This operation also marks the key as the most recently used. -# -# put(key, value): Insert or update an item. If the key already exists, update its value and mark it as the most recently used. If the key does not exist, insert the new item. When the cache reaches its capacity, it must evict the least recently used item before inserting the new one. -# -# You must use a Hash Map and a Doubly Linked List for the implementation. - -# Cache , limit 3 - -from typing import Dict - -# tail.prev.next = node, node.left = tails.prev, tails.prev = Node, node.next = tail - -# Head <-> Tail -# Head -> Node <-> Tail , -# Head <-> Node <-> Node1 <-> Tail, -# node.prev.next = node.next, node.next.prev = node.prev - -class Node: - - def __init__(self, key: str, value: str): - self.next: Node | None = None - self.prev: Node | None = None - self.key = key - self.value = value - -class LRU: - - def __init__(self, capacity: int = 2): - self.db: Dict[str, Node] = {} - self.capacity = capacity - - self.head = Node("-1", "-1") - self.tail = Node("-1", "-1") - self.head.next, self.tail.prev = self.tail, self.head - - def get(self, key: str): - node = self.db[key] - self._remove(node) - self._add_to_tail(node) - return node.value - - def put(self, key: str, value: str): - if len(self.db) < self.capacity or key in self.db: - new_node = Node(key, value) - self._add_to_tail(new_node) - self.db[key] = new_node - else: - least_recently_used_node = self.head.next - self._remove(least_recently_used_node) - del self.db[least_recently_used_node.key] - - new_node = Node(key, value) - self._add_to_tail(new_node) - self.db[key] = new_node - - def _remove(self, node: Node): - node.prev.next, node.next.prev = node.next, node.prev - - def _add_to_tail(self, node): - self.tail.prev.next, node.prev, self.tail.prev, node.next = node, self.tail.prev, node, self.tail - -lru = LRU() -lru.put("june", "park") -lru.put("victor", "sun") -lru.get("june") -lru.put("bowen", "sun") - - diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/main.py --- a/grok_interview/main.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ -# You are required to implement a production-level code live, using concurrency. Complete within 60 minutes. Suppose you are developing a simple concurrent counter that can correctly update the count in a multi-threaded environment. The initial count value is 0. Ensure it executes in 100 threads, where each thread increments the counter by 1 every second for 10 seconds. Calculate the final count after parallel execution. -# -# Input: None -# -# Output: An integer representing the final count. -# -# Constraints: -# -# The code must use multi-threading or multi-processing techniques. -# Ensure thread safety. - - -# - 100 threads -# - each tread update counter by 1 everyseconds by 10 seconds -from threading import Lock -from time import sleep -from concurrent.futures import ThreadPoolExecutor - - -SLEEP_IN_SECONDS = 1 -EXECUTION_TIME_IN_SECONDS = 3 -NUMS_THREADS = 100 - -class Counter: - - def __init__(self): - self._value = 0 - self.lock = Lock() - - def get(self): - return self._value - - def increment(self): - with self.lock: - value = self.get() - sleep(0.01) - self._value = value + 1 - -def worker(counter: Counter): - for _ in range(EXECUTION_TIME_IN_SECONDS): - counter.increment() - sleep(SLEEP_IN_SECONDS) - -def main(): - counter = Counter() - with ThreadPoolExecutor(max_workers=NUMS_THREADS) as executor: - for _ in range(NUMS_THREADS): - executor.submit(worker, counter) - print(counter.get()) - -main() diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/merge_sort.py --- a/grok_interview/merge_sort.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -from concurrent.futures import ProcessPoolExecutor -import random -from typing import List - -def merge_sort(lst: List[int]) -> List[int]: # no executor parameter! - if len(lst) <= 1: - return lst - middle = len(lst) // 2 - # These run in separate processes → no shared thread pool problem - left = merge_sort(lst[:middle]) - right = merge_sort(lst[middle:]) - return merge(left, right) - -def merge(left: List[int], right: List[int]) -> List[int]: - res = [] - i = j = 0 - while i < len(left) and j < len(right): - if left[i] <= right[j]: - res.append(left[i]) - i += 1 - else: - res.append(right[j]) - j += 1 - res.extend(left[i:]) - res.extend(right[j:]) - return res - -RANGE = 10_000 -cpu_num = 10 -def main(): - data = [random.randint(0, 100) for _ in range(RANGE)] - sorted_data = [] - ranges = [i for i in range(RANGE // cpu_num, RANGE, RANGE // 100)] - with ProcessPoolExecutor() as executor: - for pos_range in ranges: - sorted_data.append(executor.submit(merge_sort, data[:pos_range]).result()) - - while len(sorted_data) > 1: - new_sorted_data = [] - for i in range(0, len(sorted_data), 2): - if - res = merge(sorted_data[i], sorted_data[i+1]) - new_sorted_data.append(res) - - print(len(sorted_data)) - -if __name__ == "__main__": # important on Windows/macOS - main() diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/test.py --- a/grok_interview/test.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -import asyncio -import random - - -class database: - def __init__(self): - self.values = 0 - self._lock = asyncio.Lock() - - def get(self): - return self.values - - def set(self, values): - self.values = values - - -async def add(number, db: database, random_wait: int): - async with db._lock: - current = db.get() - await asyncio.sleep(random.random() * 0.05) - db.set(current + number) - - -async def main(): - db = database() - tasks = [] - for i in range(100): - tasks.append(asyncio.create_task( - add(i, db, random.randint(1, 5)) - )) - await asyncio.gather(*tasks) - print(db.get()) - print(sum([i for i in range(100)])) - -asyncio.run(main()) - diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/thread.py --- a/grok_interview/thread.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -# 1. data struture {} -# - multiple threads can access it -# - multiple threads can edit it if it is editable. (mutex) - - -from hashlib import sha256 -from threading import Lock - -class Subscribers: - def __init__(self): - self.id = sha256() - - def on_event(self, event): - pass - -class Publisher: - def __init__(self): - self._subscritions = {} - self.lock = Lock() - - def publish_event(self, event): - for subscriber_id, subscriber in enumerate(self._subscritions): - subscriber.on_event(event) - - def add_subscribers(self, subscriber: Subscribers): - with self.lock: - self._subscritions[subscriber.id] = subscriber - - def remove_subscribers(self, subscriber: Subscribers): - with self.lock: - del self._subscritions[subscriber.id] - diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/transaction.py --- a/grok_interview/transaction.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ -# Question: In-Memory Key-Value Store with Nested Transactions -# -# Design an in-memory key-value database that supports the following commands and fully handles nested transactions. -# Core Operations: - -# SET : Sets the value for a key. - -# GET : Returns the value for a key. - -# UNSET : Removes the key and its value. -# -# Transaction Operations: - -# BEGIN: Starts a new transaction scope. If a transaction is already active, this starts a nested transaction. - -# COMMIT: Applies all changes made in the current transaction scope and all its active nested transactions to the parent scope (or to the main database state if no parent exists). After a successful commit, the transaction scope is closed. - -# ROLLBACK: Discards all changes made in the current transaction scope and all its active nested transactions, restoring the state to what it was when the BEGIN command was issued for the current scope. After a successful rollback, the transaction scope is closed. - -# Implementation Goal: -# Design the primary data structures and outline the logic for SET, COMMIT, and ROLLBACK to ensure nested transactions operate correctly. Explain how the state of the database is managed across multiple transaction layers. - - -class Database: - def __init__(self): - self.global_db = {} - self.queue = [] - - def commit(self): - copy_db = self.queue.pop() - if self.queue: - self.queue[-1].merge(copy_db) - else: - self.global_db |= copy_db - - def set(self, key, value): - if self.queue: - curr_db = self.queue[-1] - curr_db[key] = value - else: - self.global_db[key] = value - - def get(self, key): - if self.queue: - for copy_db in reversed(self.queue): - if key in copy_db: - return copy_db[key] - - return self.global_db[key] - - def unset(self, key): - if self.queue: - curr_db = self.queue[-1] - del curr_db[key] - else: - del self.global_db[key] - diff -r 551d9fc0a2ba -r 4bc56e88e1f3 grok_interview/tries.py --- a/grok_interview/tries.py Thu Dec 25 20:07:46 2025 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,95 +0,0 @@ -# Question: Alphabet Word Search II -# -# You are given an M x N board of characters and a list of strings called dictionary. Find all unique words in the dictionary that can be found in the board. -# -# A word can be constructed from letters of sequentially adjacent cells, where "adjacent" cells are those horizontally, vertically, or diagonally neighboring. The same letter cell may not be used more than once in a single word. -# -# Implement a function findWords(board, dictionary) that returns a list of all unique words found. -# -# Example: -# -# Board: -# -# [['o', 'a', 'b', 'n'], -# ['o', 't', 'a', 'e'], -# ['a', 'h', 'k', 'r'], -# ['a', 'f', 'l', 'v']] -# - -# Dictionary: ["oath", "pea", "eat", "rain", "hobo", "oats"] -# -# Expected Output: ["oath", "eat", "hobo", "oats"] -# -# Constraints & Requirements: - -# The search should be optimized for large dictionaries. - -# Clearly explain your chosen data structure and algorithm for prefix matching and path finding. - -from collections import deque -from os import wait -from typing import List - - -board = [ - ['o', 'a', 'b', 'n'], - ['o', 't', 'a', 'e'], - ['a', 'h', 'k', 'r'], - ['a', 'f', 'l', 'v'] -] -dictionary = ["oath", "pea", "eat", "rain", "hobo", "oats"] -directions = [ - (0, 1), - (0, -1), - (1, 0), - (-1, 0), - (1, 1), - (-1, -1), - (1, -1), - (-1, 1), -] - -def find_words(board: List[List[str]], dictionary: List[str]): - tree = {} - for row in range(len(board)): - for col in range(len(board)): - dfs(row, col, tree) - - ans = [] - for word in dictionary: - curr = tree - possible = True - for letter in word: - if letter not in curr: - possible = False - break - curr = curr[letter] - if possible: - ans.append(word) - - return ans - - -def dfs(row, col, tree): - if not (row >= 0 and row < len(board)\ - and col >= 0 and col < len(board[0])): - return {} - - if board[row][col] == "#": - return {} - - letter = board[row][col] - children = {} - board[row][col] = "#" - - for direction in directions: - new_row, new_col = row + direction[0], col + direction[1] - child_node = dfs(new_row, new_col, tree[letter]) - if child_node: - children.update(child_node) - - board[row][col] = letter - - return { letter: children } - -print(find_words(board, dictionary))