diff markdown_converter/wasm/markdown_to_html_wasm.c @ 173:827c6ac504cd hg-web

Merged in default here.
author MrJuneJune <me@mrjunejune.com>
date Mon, 19 Jan 2026 18:59:10 -0800
parents cd35e600ae34
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/markdown_converter/wasm/markdown_to_html_wasm.c	Mon Jan 19 18:59:10 2026 -0800
@@ -0,0 +1,698 @@
+/**
+ * Markdown to HTML Converter - Standalone WASM Implementation
+ * No libc dependencies - can be compiled with: clang --target=wasm32
+ */
+
+#define WASM_EXPORT __attribute__((visibility("default")))
+
+typedef unsigned long size_t;
+typedef int int32_t;
+
+// Simple bump allocator for WASM
+#define HEAP_SIZE (1024 * 1024)  // 1MB heap
+static char heap[HEAP_SIZE];
+static size_t heap_offset = 0;
+
+WASM_EXPORT void *malloc(size_t size)
+{
+  // Align to 8 bytes
+  size_t aligned_offset = (heap_offset + 7) & ~7;
+  if (aligned_offset + size > HEAP_SIZE) return 0;
+
+  void *ptr = &heap[aligned_offset];
+  heap_offset = aligned_offset + size;
+  return ptr;
+}
+
+WASM_EXPORT void free(void *ptr)
+{
+  // Simple bump allocator - no actual free
+  (void)ptr;
+}
+
+WASM_EXPORT void heap_reset(void)
+{
+  heap_offset = 0;
+}
+
+// String functions
+static size_t strlen(const char *s)
+{
+  size_t len = 0;
+  while (s[len]) len++;
+  return len;
+}
+
+static void *memcpy(void *dest, const void *src, size_t n)
+{
+  char *d = (char *)dest;
+  const char *s = (const char *)src;
+  while (n--) *d++ = *s++;
+  return dest;
+}
+
+static int isspace_c(int c)
+{
+  return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v';
+}
+
+static int isdigit_c(int c)
+{
+  return c >= '0' && c <= '9';
+}
+
+// String buffer for building HTML output
+typedef struct {
+  char  *data;
+  size_t length;
+  size_t capacity;
+} StringBuffer;
+
+static StringBuffer *buffer_create(size_t initial_capacity)
+{
+  StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer));
+  if (!buf) return 0;
+
+  buf->data = (char *)malloc(initial_capacity);
+  if (!buf->data) return 0;
+
+  buf->data[0] = '\0';
+  buf->length = 0;
+  buf->capacity = initial_capacity;
+  return buf;
+}
+
+static void buffer_grow(StringBuffer *buf, size_t needed)
+{
+  if (buf->length + needed + 1 > buf->capacity) {
+    size_t new_capacity = buf->capacity * 2;
+    while (new_capacity < buf->length + needed + 1)
+      new_capacity *= 2;
+
+    char *new_data = (char *)malloc(new_capacity);
+    if (new_data) {
+      memcpy(new_data, buf->data, buf->length + 1);
+      buf->data = new_data;
+      buf->capacity = new_capacity;
+    }
+  }
+}
+
+static void buffer_append(StringBuffer *buf, const char *str)
+{
+  size_t len = strlen(str);
+  buffer_grow(buf, len);
+  memcpy(buf->data + buf->length, str, len + 1);
+  buf->length += len;
+}
+
+static void buffer_append_n(StringBuffer *buf, const char *str, size_t n)
+{
+  buffer_grow(buf, n);
+  memcpy(buf->data + buf->length, str, n);
+  buf->length += n;
+  buf->data[buf->length] = '\0';
+}
+
+static void buffer_append_char(StringBuffer *buf, char c)
+{
+  buffer_grow(buf, 1);
+  buf->data[buf->length++] = c;
+  buf->data[buf->length] = '\0';
+}
+
+// Check if line starts with pattern (after trimming whitespace)
+static int starts_with(const char *line, const char *pattern)
+{
+  while (*line && isspace_c(*line)) line++;
+  size_t plen = strlen(pattern);
+  for (size_t i = 0; i < plen; i++) {
+    if (line[i] != pattern[i]) return 0;
+  }
+  return 1;
+}
+
+// Count leading # characters
+static int count_heading_level(const char *line)
+{
+  int count = 0;
+  while (*line && isspace_c(*line)) line++;
+  while (line[count] == '#' && count < 6) count++;
+  if (count > 0 && line[count] == ' ') return count;
+  return 0;
+}
+
+// Skip whitespace
+static const char *skip_whitespace(const char *str)
+{
+  while (*str && isspace_c(*str)) str++;
+  return str;
+}
+
+// Check if line is empty
+static int is_empty_line(const char *line)
+{
+  while (*line) {
+    if (!isspace_c(*line)) return 0;
+    line++;
+  }
+  return 1;
+}
+
+// Check if line is horizontal rule
+static int is_horizontal_rule(const char *line)
+{
+  line = skip_whitespace(line);
+  char first = *line;
+  if (first != '-' && first != '*' && first != '_') return 0;
+
+  int count = 0;
+  while (*line) {
+    if (*line == first) count++;
+    else if (!isspace_c(*line)) return 0;
+    line++;
+  }
+  return count >= 3;
+}
+
+// Check if line is unordered list item
+static int is_unordered_list(const char *line)
+{
+  line = skip_whitespace(line);
+  return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' ';
+}
+
+// Check if line is ordered list item
+static int is_ordered_list(const char *line)
+{
+  line = skip_whitespace(line);
+  while (*line && isdigit_c(*line)) line++;
+  return *line == '.' && line[1] == ' ';
+}
+
+// Check if line is a table row (starts with |)
+static int is_table_row(const char *line)
+{
+  line = skip_whitespace(line);
+  return *line == '|';
+}
+
+// Check if line is a table separator row (| --- | --- |)
+static int is_table_separator(const char *line)
+{
+  line = skip_whitespace(line);
+  if (*line != '|') return 0;
+  line++;
+
+  int has_dash = 0;
+  while (*line) {
+    if (*line == '-') has_dash = 1;
+    else if (*line == '|' || *line == ':' || isspace_c(*line)) { /* ok */ }
+    else return 0;
+    line++;
+  }
+  return has_dash;
+}
+
+// Forward declaration for process_inline
+static void process_inline(StringBuffer *buf, const char *text, size_t len);
+
+// Parse table cells from a row and append to buffer
+static void parse_table_row(StringBuffer *buf, const char *line, int is_header)
+{
+  const char *cell_tag = is_header ? "th" : "td";
+
+  buffer_append(buf, "<tr>");
+
+  line = skip_whitespace(line);
+  if (*line == '|') line++; // Skip leading |
+
+  while (*line) {
+    // Skip whitespace before cell content
+    while (*line && isspace_c(*line)) line++;
+
+    // Find cell end (next | or end of line)
+    const char *cell_start = line;
+    while (*line && *line != '|') line++;
+
+    // Trim trailing whitespace from cell
+    const char *cell_end = line;
+    while (cell_end > cell_start && isspace_c(*(cell_end - 1))) cell_end--;
+
+    size_t cell_len = cell_end - cell_start;
+
+    // Only output cell if we have content or more cells coming
+    if (cell_len > 0 || *line == '|') {
+      buffer_append(buf, "<");
+      buffer_append(buf, cell_tag);
+      buffer_append(buf, ">");
+      if (cell_len > 0) {
+        process_inline(buf, cell_start, cell_len);
+      }
+      buffer_append(buf, "</");
+      buffer_append(buf, cell_tag);
+      buffer_append(buf, ">");
+    }
+
+    if (*line == '|') line++; // Skip |
+
+    // Check if this was the trailing |
+    const char *rest = line;
+    while (*rest && isspace_c(*rest)) rest++;
+    if (!*rest) break; // End of line after trailing |
+  }
+
+  buffer_append(buf, "</tr>");
+}
+
+// Process inline markdown
+static void process_inline(StringBuffer *buf, const char *text, size_t len)
+{
+  size_t i = 0;
+
+  while (i < len) {
+    // Links: [text](url)
+    if (text[i] == '[') {
+      size_t link_start = i + 1;
+      size_t link_end = link_start;
+      while (link_end < len && text[link_end] != ']') link_end++;
+
+      if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') {
+        size_t url_start = link_end + 2;
+        size_t url_end = url_start;
+        while (url_end < len && text[url_end] != ')') url_end++;
+
+        if (url_end < len) {
+          buffer_append(buf, "<a href=\"");
+          buffer_append_n(buf, text + url_start, url_end - url_start);
+          buffer_append(buf, "\">");
+          buffer_append_n(buf, text + link_start, link_end - link_start);
+          buffer_append(buf, "</a>");
+          i = url_end + 1;
+          continue;
+        }
+      }
+    }
+
+    // Images: ![alt](url)
+    if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') {
+      size_t alt_start = i + 2;
+      size_t alt_end = alt_start;
+      while (alt_end < len && text[alt_end] != ']') alt_end++;
+
+      if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') {
+        size_t url_start = alt_end + 2;
+        size_t url_end = url_start;
+        while (url_end < len && text[url_end] != ')') url_end++;
+
+        if (url_end < len) {
+          buffer_append(buf, "<img src=\"");
+          buffer_append_n(buf, text + url_start, url_end - url_start);
+          buffer_append(buf, "\" alt=\"");
+          buffer_append_n(buf, text + alt_start, alt_end - alt_start);
+          buffer_append(buf, "\">");
+          i = url_end + 1;
+          continue;
+        }
+      }
+    }
+
+    // Bold: **text** or __text__
+    if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') ||
+        (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) {
+      char marker = text[i];
+      size_t start = i + 2;
+      size_t end = start;
+      while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++;
+
+      if (end + 1 < len) {
+        buffer_append(buf, "<strong>");
+        process_inline(buf, text + start, end - start);
+        buffer_append(buf, "</strong>");
+        i = end + 2;
+        continue;
+      }
+    }
+
+    // Strikethrough: ~~text~~
+    if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') {
+      size_t start = i + 2;
+      size_t end = start;
+      while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++;
+
+      if (end + 1 < len) {
+        buffer_append(buf, "<del>");
+        process_inline(buf, text + start, end - start);
+        buffer_append(buf, "</del>");
+        i = end + 2;
+        continue;
+      }
+    }
+
+    // Italic: *text* or _text_
+    if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace_c(text[i + 1])) {
+      char marker = text[i];
+      size_t start = i + 1;
+      size_t end = start;
+      while (end < len && text[end] != marker) end++;
+
+      if (end < len && end > start) {
+        buffer_append(buf, "<em>");
+        process_inline(buf, text + start, end - start);
+        buffer_append(buf, "</em>");
+        i = end + 1;
+        continue;
+      }
+    }
+
+    // Inline code: `code`
+    if (text[i] == '`') {
+      size_t start = i + 1;
+      size_t end = start;
+      while (end < len && text[end] != '`') end++;
+
+      if (end < len) {
+        buffer_append(buf, "<code>");
+        buffer_append_n(buf, text + start, end - start);
+        buffer_append(buf, "</code>");
+        i = end + 1;
+        continue;
+      }
+    }
+
+    // HTML escape
+    if (text[i] == '<') {
+      buffer_append(buf, "&lt;");
+    } else if (text[i] == '>') {
+      buffer_append(buf, "&gt;");
+    } else if (text[i] == '&') {
+      buffer_append(buf, "&amp;");
+    } else {
+      buffer_append_char(buf, text[i]);
+    }
+    i++;
+  }
+}
+
+// Append heading tag
+static void append_heading_tag(StringBuffer *buf, int level, int closing)
+{
+  buffer_append_char(buf, '<');
+  if (closing) buffer_append_char(buf, '/');
+  buffer_append_char(buf, 'h');
+  buffer_append_char(buf, '0' + level);
+  buffer_append_char(buf, '>');
+}
+
+// Convert markdown to HTML
+WASM_EXPORT char *markdown_to_html(const char *markdown)
+{
+  if (!markdown) return 0;
+
+  StringBuffer *buf = buffer_create(4096);
+  if (!buf) return 0;
+
+  const char *ptr = markdown;
+  const char *line_start;
+
+  while (*ptr) {
+    line_start = ptr;
+
+    // Find end of line
+    while (*ptr && *ptr != '\n') ptr++;
+    size_t line_len = ptr - line_start;
+
+    // Create line copy
+    char *line = (char *)malloc(line_len + 1);
+    if (!line) return buf->data;
+    memcpy(line, line_start, line_len);
+    line[line_len] = '\0';
+
+    // Skip empty lines
+    if (is_empty_line(line)) {
+      if (*ptr == '\n') ptr++;
+      continue;
+    }
+
+    // Headings
+    int heading_level = count_heading_level(line);
+    if (heading_level > 0) {
+      const char *content = skip_whitespace(line);
+      while (*content == '#') content++;
+      content = skip_whitespace(content);
+
+      append_heading_tag(buf, heading_level, 0);
+      process_inline(buf, content, strlen(content));
+      append_heading_tag(buf, heading_level, 1);
+
+      if (*ptr == '\n') ptr++;
+      continue;
+    }
+
+    // Code block
+    if (starts_with(line, "```")) {
+      buffer_append(buf, "<pre><code>");
+      if (*ptr == '\n') ptr++;
+
+      while (*ptr) {
+        line_start = ptr;
+        while (*ptr && *ptr != '\n') ptr++;
+        line_len = ptr - line_start;
+
+        char *code_line = (char *)malloc(line_len + 1);
+        if (!code_line) break;
+        memcpy(code_line, line_start, line_len);
+        code_line[line_len] = '\0';
+
+        if (starts_with(code_line, "```")) {
+          if (*ptr == '\n') ptr++;
+          break;
+        }
+
+        for (size_t i = 0; i < line_len; i++) {
+          if (code_line[i] == '<') buffer_append(buf, "&lt;");
+          else if (code_line[i] == '>') buffer_append(buf, "&gt;");
+          else if (code_line[i] == '&') buffer_append(buf, "&amp;");
+          else buffer_append_char(buf, code_line[i]);
+        }
+        buffer_append_char(buf, '\n');
+
+        if (*ptr == '\n') ptr++;
+      }
+
+      buffer_append(buf, "</code></pre>");
+      continue;
+    }
+
+    // Blockquote
+    if (starts_with(line, ">")) {
+      buffer_append(buf, "<blockquote>");
+
+      while (1) {
+        const char *content = skip_whitespace(line);
+        if (*content == '>') content++;
+        content = skip_whitespace(content);
+        process_inline(buf, content, strlen(content));
+        buffer_append_char(buf, ' ');
+
+        if (*ptr == '\n') ptr++;
+        if (!*ptr) break;
+
+        line_start = ptr;
+        while (*ptr && *ptr != '\n') ptr++;
+        line_len = ptr - line_start;
+
+        line = (char *)malloc(line_len + 1);
+        if (!line) break;
+        memcpy(line, line_start, line_len);
+        line[line_len] = '\0';
+
+        if (!starts_with(line, ">")) {
+          ptr = line_start;
+          break;
+        }
+      }
+
+      buffer_append(buf, "</blockquote>");
+      continue;
+    }
+
+    // Horizontal rule
+    if (is_horizontal_rule(line)) {
+      buffer_append(buf, "<hr>");
+      if (*ptr == '\n') ptr++;
+      continue;
+    }
+
+    // Unordered list
+    if (is_unordered_list(line)) {
+      buffer_append(buf, "<ul>");
+
+      while (1) {
+        const char *content = skip_whitespace(line);
+        content += 2;
+
+        buffer_append(buf, "<li>");
+        process_inline(buf, content, strlen(content));
+        buffer_append(buf, "</li>");
+
+        if (*ptr == '\n') ptr++;
+        if (!*ptr) break;
+
+        line_start = ptr;
+        while (*ptr && *ptr != '\n') ptr++;
+        line_len = ptr - line_start;
+
+        line = (char *)malloc(line_len + 1);
+        if (!line) break;
+        memcpy(line, line_start, line_len);
+        line[line_len] = '\0';
+
+        if (!is_unordered_list(line)) {
+          ptr = line_start;
+          break;
+        }
+      }
+
+      buffer_append(buf, "</ul>");
+      continue;
+    }
+
+    // Ordered list
+    if (is_ordered_list(line)) {
+      buffer_append(buf, "<ol>");
+
+      while (1) {
+        const char *content = skip_whitespace(line);
+        while (*content && isdigit_c(*content)) content++;
+        if (*content == '.') content++;
+        content = skip_whitespace(content);
+
+        buffer_append(buf, "<li>");
+        process_inline(buf, content, strlen(content));
+        buffer_append(buf, "</li>");
+
+        if (*ptr == '\n') ptr++;
+        if (!*ptr) break;
+
+        line_start = ptr;
+        while (*ptr && *ptr != '\n') ptr++;
+        line_len = ptr - line_start;
+
+        line = (char *)malloc(line_len + 1);
+        if (!line) break;
+        memcpy(line, line_start, line_len);
+        line[line_len] = '\0';
+
+        if (!is_ordered_list(line)) {
+          ptr = line_start;
+          break;
+        }
+      }
+
+      buffer_append(buf, "</ol>");
+      continue;
+    }
+
+    // Table
+    if (is_table_row(line)) {
+      // Check if next line is a separator (to confirm this is a table)
+      const char *peek_ptr = ptr;
+      if (*peek_ptr == '\n') peek_ptr++;
+
+      const char *next_line_start = peek_ptr;
+      while (*peek_ptr && *peek_ptr != '\n') peek_ptr++;
+      size_t next_line_len = peek_ptr - next_line_start;
+
+      char *next_line = (char *)malloc(next_line_len + 1);
+      if (next_line) {
+        memcpy(next_line, next_line_start, next_line_len);
+        next_line[next_line_len] = '\0';
+
+        if (is_table_separator(next_line)) {
+          // It's a valid table
+          buffer_append(buf, "<table>");
+
+          // Header row
+          buffer_append(buf, "<thead>");
+          parse_table_row(buf, line, 1);
+          buffer_append(buf, "</thead>");
+
+          // Skip to after separator
+          if (*ptr == '\n') ptr++;
+          ptr = peek_ptr;
+          if (*ptr == '\n') ptr++;
+
+          // Body rows
+          buffer_append(buf, "<tbody>");
+          while (*ptr) {
+            line_start = ptr;
+            while (*ptr && *ptr != '\n') ptr++;
+            line_len = ptr - line_start;
+
+            line = (char *)malloc(line_len + 1);
+            if (!line) break;
+            memcpy(line, line_start, line_len);
+            line[line_len] = '\0';
+
+            if (!is_table_row(line) || is_empty_line(line)) {
+              ptr = line_start;
+              break;
+            }
+
+            parse_table_row(buf, line, 0);
+            if (*ptr == '\n') ptr++;
+          }
+          buffer_append(buf, "</tbody>");
+
+          buffer_append(buf, "</table>");
+          continue;
+        }
+      }
+    }
+
+    // Paragraph
+    buffer_append(buf, "<p>");
+
+    while (1) {
+      const char *content = skip_whitespace(line);
+      process_inline(buf, content, strlen(content));
+
+      if (*ptr == '\n') ptr++;
+      if (!*ptr) break;
+
+      line_start = ptr;
+      while (*ptr && *ptr != '\n') ptr++;
+      line_len = ptr - line_start;
+
+      line = (char *)malloc(line_len + 1);
+      if (!line) break;
+      memcpy(line, line_start, line_len);
+      line[line_len] = '\0';
+
+      if (is_empty_line(line) ||
+          count_heading_level(line) > 0 ||
+          starts_with(line, "```") ||
+          starts_with(line, ">") ||
+          is_horizontal_rule(line) ||
+          is_unordered_list(line) ||
+          is_ordered_list(line) ||
+          is_table_row(line)) {
+        ptr = line_start;
+        break;
+      }
+
+      buffer_append_char(buf, ' ');
+    }
+
+    buffer_append(buf, "</p>");
+  }
+
+  return buf->data;
+}
+
+// Get string length (for JS interop)
+WASM_EXPORT size_t markdown_strlen(const char *str)
+{
+  return str ? strlen(str) : 0;
+}