Mercurial
view love/pdf-mcp/main.py @ 71:75de5903355c
Giagantic changes that update Dowa library to be more align with stb style array and hashmap. Updated Seobeo to be caching on server side instead of file level caching. Deleted bunch of things I don't really use.
| author | June Park <parkjune1995@gmail.com> |
|---|---|
| date | Sun, 28 Dec 2025 20:34:22 -0800 |
| parents | cf9caa4abc3e |
| children |
line wrap: on
line source
import os import uuid import json import re from typing import List, Dict import markdown2 import requests from fastmcp import FastMCP from xai_sdk import Client from xai_sdk.chat import system, user from xai_sdk.tools import web_search, code_execution # Configuration XAI_API_KEY = os.getenv("XAI_API_KEY", "no_api") OUTPUT_DIR = os.getenv("DECK_OUTPUT_DIR", "generated_decks") # Initialize xAI client xai_client = Client(api_key=XAI_API_KEY) # Create FastMCP server mcp = FastMCP("presentation-generator", port=7776, host="0.0.0.0") def generate_summary_and_images(topic: str) -> Dict[str, any]: """Use Grok API with live_search to summarize + extract image URLs.""" chat = xai_client.chat.create( model="grok-4-fast", tools=[ web_search(), code_execution(), ], ) SYSTEM_PROMPT = """ You are an expert presentation creator. For the given topic, use live_search to fetch the latest, reliable web sources. Summarize into concise, impactful bullet points perfect for slides (max 6-8 lines per slide, start with strong verbs/key facts). Use markdown bullets. Separate logical sections with --- for slide breaks. Also, search for 10-15 high-quality, royalty-free images (e.g., from Unsplash/Pexels via web search) suitable for slides—focus on visuals like diagrams/infographics. Respond ONLY in valid JSON: {"summary": "markdown summary here", "images": ["url1", "url2", ...]}. Ensure images are direct .jpg/.png links, diverse, and relevant. """ chat.append(system(SYSTEM_PROMPT)) response = chat.append(user(f"Topic: {topic}")) try: content = response.sample().content data = json.loads(content) return data except json.JSONDecodeError: # Fallback: Parse markdown and extract URLs via regex # Shouldn't really get here... summary = content.split("images:")[0].strip() if "images:" in content else content img_pattern = r'https?://[^\s<>"]+\.(?:jpg|jpeg|png|gif)(?:\?[^\s<>"]*)?' images = re.findall(img_pattern, content) return {"summary": summary, "images": images[:15]} def download_image(url: str, path: str) -> bool: """Download image to local path.""" try: img_data = requests.get(url, timeout=10).content os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, 'wb') as f: f.write(img_data) return True except: return False def create_presentation_deck(topic: str) -> Dict[str, str | int]: """Generate a complete presentation deck.""" # Create unique deck directory deck_id = str(uuid.uuid4())[:8] deck_dir = os.path.join(OUTPUT_DIR, deck_id) os.makedirs(deck_dir, exist_ok=True) os.makedirs(os.path.join(deck_dir, "img"), exist_ok=True) # Generate summary and images using Grok data = generate_summary_and_images(topic) markdown_summary = data["summary"] image_urls = data.get("images", []) # Download images local_images = [] for i, url in enumerate(image_urls[:15]): ext = url.split(".")[-1].split("?")[0] or "jpg" path = os.path.join(deck_dir, "img", f"{i}.{ext}") if download_image(url, path): local_images.append(f"img/{i}.{ext}") else: local_images.append(url) # Split summary into slides raw_slides = markdown_summary.split("---") slides = [] for i, raw in enumerate(raw_slides): html_content = markdown2.markdown(raw.strip()) img = local_images[i] if i < len(local_images) else (local_images[-1] if local_images else "") slides.append({"content": html_content, "image": img}) # Generate Reveal.js HTML html_content = generate_reveal_html(topic, slides) deck_path = os.path.join(deck_dir, "index.html") with open(deck_path, "w", encoding="utf-8") as f: f.write(html_content) # Get absolute path abs_path = os.path.abspath(deck_path) return { "deck_id": deck_id, "deck_path": abs_path, "markdown_summary": markdown_summary, "num_slides": len(slides), "images_count": len(local_images) } def generate_reveal_html(topic: str, slides: List[Dict]) -> str: """Generate Reveal.js HTML presentation.""" slides_html = "" for slide in slides: img_html = f'<img src="{slide["image"]}" style="max-width:50%;float:right;margin-left:20px;" />' if slide["image"] else "" slides_html += f""" <section> {img_html} <div style="text-align:left;"> {slide["content"]} </div> </section> """ return f"""<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title>{topic} - Presentation</title> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/reveal.js/4.3.1/reset.min.css"> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/reveal.js/4.3.1/reveal.min.css"> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/reveal.js/4.3.1/theme/black.min.css"> </head> <body> <div class="reveal"> <div class="slides"> <section> <h1>{topic}</h1> <p>Generated with Grok AI</p> </section> {slides_html} </div> </div> <script src="https://cdnjs.cloudflare.com/ajax/libs/reveal.js/4.3.1/reveal.min.js"></script> <script> Reveal.initialize({{ hash: true, transition: 'slide' }}); </script> </body> </html>""" # FastMCP Tool Definitions @mcp.tool() def generate_presentation(topic: str) -> str: """Generate a complete PowerPoint-style presentation deck on any topic using Grok AI. This tool researches the topic using live web search, creates concise slide content, finds relevant images, and generates a Reveal.js HTML presentation that can be viewed in browser. Args: topic: The topic for the presentation (e.g., 'Dogs in Korea', 'Quantum Computing Basics', 'Climate Change Returns: A summary of the generated presentation with file location """ try: result = create_presentation_deck(topic) # Construct the public URL public_url = f"https://slide.babocoder.com/{result['deck_id']}/index.html" # Return JSON response as Grok expects with instruction json_response = json.dumps({ "file_path": public_url, "type": "html", "metadata": { "topic": topic, "deck_id": result['deck_id'], "num_slides": result['num_slides'], "images_count": result['images_count'] } }) response = f"""Presentation generated successfully! IMPORTANT: Return this exact JSON response to the client: {json_response} DO NOT modify or explain this JSON. Send it directly to the client as-is.""" return response except Exception as e: return f"❌ Error generating presentation: {str(e)}\n\nPlease try again or check the server logs." @mcp.tool() def get_presentation_preview(topic: str) -> str: """Generate just the markdown summary for a presentation topic without creating the full deck. Useful for previewing content or getting a quick outline before generating slides. Args: topic: The topic to preview Returns: A markdown preview of the presentation content """ try: data = generate_summary_and_images(topic) markdown_summary = data["summary"] response = f"""📝 Presentation Preview for: {topic} {markdown_summary} --- **Images Found:** {len(data.get('images', []))} To generate the full presentation deck with slides, use the 'generate_presentation' tool. """ return response except Exception as e: return f"❌ Error generating preview: {str(e)}" if __name__ == "__main__": os.makedirs(OUTPUT_DIR, exist_ok=True) mcp.run(transport="streamable-http")