view love/pdf-mcp/pdf-mcp.py @ 175:71ad34a8bc9a hg-web

[HgWeb] Can stream hg response now. Added react page for hg web since we use json anyway.
author MrJuneJune <me@mrjunejune.com>
date Tue, 20 Jan 2026 06:06:47 -0800
parents cf9caa4abc3e
children
line wrap: on
line source

import os
import uuid
import json
import re
from typing import List, Dict
import markdown2
import requests

from fastmcp import FastMCP

from xai_sdk import Client
from xai_sdk.chat import system, user
from xai_sdk.tools import web_search, code_execution

XAI_API_KEY = os.getenv("XAI_API_KEY", "no_apis")
OUTPUT_DIR = os.getenv("DECK_OUTPUT_DIR", "generated_decks")

xai_client = Client(api_key=XAI_API_KEY)

mcp = FastMCP("presentation-generator", port=7776, host="0.0.0.0")

def generate_summary_and_images(topic: str) -> Dict[str, any]:
    """Use Grok API with live_search to summarize + extract image URLs."""
    chat = xai_client.chat.create(
        model="grok-4-fast",
        tools=[
            web_search(),
            code_execution(),
        ],  
    )
    
    SYSTEM_PROMPT = """
    You are an expert presentation creator. For the given topic, use live_search to fetch the latest, reliable web sources.
    Summarize into concise, impactful bullet points perfect for slides (max 6-8 lines per slide, start with strong verbs/key facts).
    Use markdown bullets. Separate logical sections with --- for slide breaks.
    Also, search for 10-15 high-quality, royalty-free images (e.g., from Unsplash/Pexels via web search) suitable for slides—focus on visuals like diagrams/infographics.
    Respond ONLY in valid JSON: {"summary": "markdown summary here", "images": ["url1", "url2", ...]}.
    Ensure images are direct .jpg/.png links, diverse, and relevant.
    """
   
    chat.append(system(SYSTEM_PROMPT))
    response = chat.append(user(f"Topic: {topic}"))
    
    try:
        content = response.sample().content
        data = json.loads(content)
        return data
    except json.JSONDecodeError:
        # Fallback: Parse markdown and extract URLs via regex
        summary = content.split("images:")[0].strip() if "images:" in content else content
        img_pattern = r'https?://[^\s<>"]+\.(?:jpg|jpeg|png|gif)(?:\?[^\s<>"]*)?'
        images = re.findall(img_pattern, content)
        return {"summary": summary, "images": images[:15]}

def download_image(url: str, path: str) -> bool:
    """Download image to local path."""
    try:
        img_data = requests.get(url, timeout=10).content
        os.makedirs(os.path.dirname(path), exist_ok=True)
        with open(path, 'wb') as f:
            f.write(img_data)
        return True
    except:
        return False

def create_presentation_deck(topic: str) -> Dict[str, str | int]:
    """Generate a complete presentation deck."""
    # Create unique deck directory
    deck_id = str(uuid.uuid4())[:8]
    deck_dir = os.path.join(OUTPUT_DIR, deck_id)
    os.makedirs(deck_dir, exist_ok=True)
    os.makedirs(os.path.join(deck_dir, "img"), exist_ok=True)

    # Generate summary and images using Grok
    data = generate_summary_and_images(topic)
    markdown_summary = data["summary"]
    image_urls = data.get("images", [])

    # Download images
    local_images = []
    for i, url in enumerate(image_urls[:15]):
        ext = url.split(".")[-1].split("?")[0] or "jpg"
        path = os.path.join(deck_dir, "img", f"{i}.{ext}")
        if download_image(url, path):
            local_images.append(f"img/{i}.{ext}")
        else:
            local_images.append(url)

    # Split summary into slides
    raw_slides = markdown_summary.split("---")
    slides = []
    for i, raw in enumerate(raw_slides):
        html_content = markdown2.markdown(raw.strip())
        img = local_images[i] if i < len(local_images) else (local_images[-1] if local_images else "")
        slides.append({"content": html_content, "image": img})

    # Generate Reveal.js HTML
    html_content = generate_reveal_html(topic, slides)
    
    deck_path = os.path.join(deck_dir, "index.html")
    with open(deck_path, "w", encoding="utf-8") as f:
        f.write(html_content)

    # Get absolute path
    abs_path = os.path.abspath(deck_path)
    
    return {
        "deck_id": deck_id,
        "deck_path": abs_path,
        "markdown_summary": markdown_summary,
        "num_slides": len(slides),
        "images_count": len(local_images)
    }

def generate_reveal_html(topic: str, slides: List[Dict]) -> str:
    """Generate Reveal.js HTML presentation."""
    slides_html = ""
    for slide in slides:
        img_html = f'<img src="{slide["image"]}" style="max-width:50%;float:right;margin-left:20px;" />' if slide["image"] else ""
        slides_html += f"""
        <section>
            {img_html}
            <div style="text-align:left;">
                {slide["content"]}
            </div>
        </section>
        """
    
    return f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <title>{topic} - Presentation</title>
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/reveal.js/4.3.1/reset.min.css">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/reveal.js/4.3.1/reveal.min.css">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/reveal.js/4.3.1/theme/black.min.css">
</head>
<body>
    <div class="reveal">
        <div class="slides">
            <section>
                <h1>{topic}</h1>
                <p>Generated with Grok AI</p>
            </section>
            {slides_html}
        </div>
    </div>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/reveal.js/4.3.1/reveal.min.js"></script>
    <script>
        Reveal.initialize({{
            hash: true,
            transition: 'slide'
        }});
    </script>
</body>
</html>"""

# FastMCP Tool Definitions
@mcp.tool()
def generate_presentation(topic: str) -> str:
     """Generate a complete PowerPoint-style presentation deck on any topic using Grok AI.

     This tool researches the topic using live web search, creates concise slide content,
     finds relevant images, and generates a Reveal.js HTML presentation that can be viewed in browser.

     Args:
         topic: The topic for the presentation (e.g., 'Dogs in Korea', 'Quantum Computing Basics', 'Climate Change

     Returns:
         A summary of the generated presentation with file location
     """
     try:
         result = create_presentation_deck(topic)

         # Construct the public URL
         public_url = f"https://slides.babocoder.com/{result['deck_id']}/index.html"

         # Return JSON response as Grok expects with instruction
         json_response = json.dumps({
             "file_path": public_url,
             "type": "html",
             "metadata": {
                 "topic": topic,
                 "deck_id": result['deck_id'],
                 "num_slides": result['num_slides'],
                 "images_count": result['images_count']
             }
         })

         response = f"""Presentation generated successfully!

 IMPORTANT: Return this exact JSON response to the client:

 {json_response}

 DO NOT modify or explain this JSON. Send it directly to the client as-is."""

         return response
     except Exception as e:
         return f"❌ Error generating presentation: {str(e)}\n\nPlease try again or check the server logs."
 
@mcp.tool()
def get_presentation_preview(topic: str) -> str:
    """Generate just the markdown summary for a presentation topic without creating the full deck.
    
    Useful for previewing content or getting a quick outline before generating slides.
    
    Args:
        topic: The topic to preview
    
    Returns:
        A markdown preview of the presentation content
    """
    try:
        data = generate_summary_and_images(topic)
        markdown_summary = data["summary"]
        
        response = f"""📝 Presentation Preview for: {topic}

{markdown_summary}

---
**Images Found:** {len(data.get('images', []))}

To generate the full presentation deck with slides, use the 'generate_presentation' tool.
"""
        return response
        
    except Exception as e:
        return f"❌ Error generating preview: {str(e)}"

if __name__ == "__main__":
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    mcp.run(transport="streamable-http")