diff markdown_converter/markdown_to_html.c @ 154:bdcc610eeed8

[Markdown Converter][GuiZe] Added markdown coverter in C and wasm rule sets. Needs further view on this as I haven't taken a look. Written by Claude.
author June Park <parkjune1995@gmail.com>
date Mon, 12 Jan 2026 09:11:58 -0800
parents
children cd35e600ae34
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/markdown_converter/markdown_to_html.c	Mon Jan 12 09:11:58 2026 -0800
@@ -0,0 +1,545 @@
+/**
+ * Markdown to HTML Converter - C Implementation
+ * Supports: headers, bold, italic, links, lists, code blocks, blockquotes, horizontal rules
+ */
+
+#include "markdown_to_html.h"
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#define INITIAL_BUFFER_SIZE 4096
+
+// String buffer for building HTML output
+typedef struct {
+  char  *data;
+  size_t length;
+  size_t capacity;
+} StringBuffer;
+
+static StringBuffer *buffer_create(size_t initial_capacity)
+{
+  StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer));
+  if (!buf) return NULL;
+
+  buf->data = (char *)malloc(initial_capacity);
+  if (!buf->data) {
+    free(buf);
+    return NULL;
+  }
+  buf->data[0] = '\0';
+  buf->length = 0;
+  buf->capacity = initial_capacity;
+  return buf;
+}
+
+static void buffer_grow(StringBuffer *buf, size_t needed)
+{
+  if (buf->length + needed + 1 > buf->capacity) {
+    size_t new_capacity = buf->capacity * 2;
+    while (new_capacity < buf->length + needed + 1)
+      new_capacity *= 2;
+
+    char *new_data = (char *)realloc(buf->data, new_capacity);
+    if (new_data) {
+      buf->data = new_data;
+      buf->capacity = new_capacity;
+    }
+  }
+}
+
+static void buffer_append(StringBuffer *buf, const char *str)
+{
+  size_t len = strlen(str);
+  buffer_grow(buf, len);
+  memcpy(buf->data + buf->length, str, len + 1);
+  buf->length += len;
+}
+
+static void buffer_append_n(StringBuffer *buf, const char *str, size_t n)
+{
+  buffer_grow(buf, n);
+  memcpy(buf->data + buf->length, str, n);
+  buf->length += n;
+  buf->data[buf->length] = '\0';
+}
+
+static void buffer_append_char(StringBuffer *buf, char c)
+{
+  buffer_grow(buf, 1);
+  buf->data[buf->length++] = c;
+  buf->data[buf->length] = '\0';
+}
+
+static void buffer_free(StringBuffer *buf)
+{
+  if (buf) {
+    free(buf->data);
+    free(buf);
+  }
+}
+
+// Check if line starts with pattern (after trimming whitespace)
+static int starts_with(const char *line, const char *pattern)
+{
+  while (*line && isspace((unsigned char)*line)) line++;
+  return strncmp(line, pattern, strlen(pattern)) == 0;
+}
+
+// Count leading # characters
+static int count_heading_level(const char *line)
+{
+  int count = 0;
+  while (*line && isspace((unsigned char)*line)) line++;
+  while (line[count] == '#' && count < 6) count++;
+  if (count > 0 && line[count] == ' ') return count;
+  return 0;
+}
+
+// Skip whitespace
+static const char *skip_whitespace(const char *str)
+{
+  while (*str && isspace((unsigned char)*str)) str++;
+  return str;
+}
+
+// Check if line is empty (only whitespace)
+static int is_empty_line(const char *line)
+{
+  while (*line) {
+    if (!isspace((unsigned char)*line)) return 0;
+    line++;
+  }
+  return 1;
+}
+
+// Check if line is horizontal rule (---, ***, ___)
+static int is_horizontal_rule(const char *line)
+{
+  line = skip_whitespace(line);
+  char first = *line;
+  if (first != '-' && first != '*' && first != '_') return 0;
+
+  int count = 0;
+  while (*line) {
+    if (*line == first) count++;
+    else if (!isspace((unsigned char)*line)) return 0;
+    line++;
+  }
+  return count >= 3;
+}
+
+// Check if line is unordered list item
+static int is_unordered_list(const char *line)
+{
+  line = skip_whitespace(line);
+  return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' ';
+}
+
+// Check if line is ordered list item
+static int is_ordered_list(const char *line)
+{
+  line = skip_whitespace(line);
+  while (*line && isdigit((unsigned char)*line)) line++;
+  return *line == '.' && line[1] == ' ';
+}
+
+// Process inline markdown (bold, italic, code, links, strikethrough)
+static void process_inline(StringBuffer *buf, const char *text, size_t len)
+{
+  size_t i = 0;
+
+  while (i < len) {
+    // Links: [text](url)
+    if (text[i] == '[') {
+      size_t link_start = i + 1;
+      size_t link_end = link_start;
+      while (link_end < len && text[link_end] != ']') link_end++;
+
+      if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') {
+        size_t url_start = link_end + 2;
+        size_t url_end = url_start;
+        while (url_end < len && text[url_end] != ')') url_end++;
+
+        if (url_end < len) {
+          buffer_append(buf, "<a href=\"");
+          buffer_append_n(buf, text + url_start, url_end - url_start);
+          buffer_append(buf, "\">");
+          buffer_append_n(buf, text + link_start, link_end - link_start);
+          buffer_append(buf, "</a>");
+          i = url_end + 1;
+          continue;
+        }
+      }
+    }
+
+    // Images: ![alt](url)
+    if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') {
+      size_t alt_start = i + 2;
+      size_t alt_end = alt_start;
+      while (alt_end < len && text[alt_end] != ']') alt_end++;
+
+      if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') {
+        size_t url_start = alt_end + 2;
+        size_t url_end = url_start;
+        while (url_end < len && text[url_end] != ')') url_end++;
+
+        if (url_end < len) {
+          buffer_append(buf, "<img src=\"");
+          buffer_append_n(buf, text + url_start, url_end - url_start);
+          buffer_append(buf, "\" alt=\"");
+          buffer_append_n(buf, text + alt_start, alt_end - alt_start);
+          buffer_append(buf, "\">");
+          i = url_end + 1;
+          continue;
+        }
+      }
+    }
+
+    // Bold: **text** or __text__
+    if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') ||
+        (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) {
+      char marker = text[i];
+      size_t start = i + 2;
+      size_t end = start;
+      while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++;
+
+      if (end + 1 < len) {
+        buffer_append(buf, "<strong>");
+        process_inline(buf, text + start, end - start);
+        buffer_append(buf, "</strong>");
+        i = end + 2;
+        continue;
+      }
+    }
+
+    // Strikethrough: ~~text~~
+    if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') {
+      size_t start = i + 2;
+      size_t end = start;
+      while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++;
+
+      if (end + 1 < len) {
+        buffer_append(buf, "<del>");
+        process_inline(buf, text + start, end - start);
+        buffer_append(buf, "</del>");
+        i = end + 2;
+        continue;
+      }
+    }
+
+    // Italic: *text* or _text_
+    if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace((unsigned char)text[i + 1])) {
+      char marker = text[i];
+      size_t start = i + 1;
+      size_t end = start;
+      while (end < len && text[end] != marker) end++;
+
+      if (end < len && end > start) {
+        buffer_append(buf, "<em>");
+        process_inline(buf, text + start, end - start);
+        buffer_append(buf, "</em>");
+        i = end + 1;
+        continue;
+      }
+    }
+
+    // Inline code: `code`
+    if (text[i] == '`') {
+      size_t start = i + 1;
+      size_t end = start;
+      while (end < len && text[end] != '`') end++;
+
+      if (end < len) {
+        buffer_append(buf, "<code>");
+        buffer_append_n(buf, text + start, end - start);
+        buffer_append(buf, "</code>");
+        i = end + 1;
+        continue;
+      }
+    }
+
+    // HTML escape special characters
+    if (text[i] == '<') {
+      buffer_append(buf, "&lt;");
+    } else if (text[i] == '>') {
+      buffer_append(buf, "&gt;");
+    } else if (text[i] == '&') {
+      buffer_append(buf, "&amp;");
+    } else {
+      buffer_append_char(buf, text[i]);
+    }
+    i++;
+  }
+}
+
+// Convert markdown to HTML
+MDAPI char *markdown_to_html(const char *markdown)
+{
+  if (!markdown) return NULL;
+
+  StringBuffer *buf = buffer_create(INITIAL_BUFFER_SIZE);
+  if (!buf) return NULL;
+
+  const char *ptr = markdown;
+  const char *line_start;
+
+  while (*ptr) {
+    line_start = ptr;
+
+    // Find end of line
+    while (*ptr && *ptr != '\n') ptr++;
+    size_t line_len = ptr - line_start;
+
+    // Create null-terminated line copy
+    char *line = (char *)malloc(line_len + 1);
+    if (!line) {
+      buffer_free(buf);
+      return NULL;
+    }
+    memcpy(line, line_start, line_len);
+    line[line_len] = '\0';
+
+    // Skip empty lines
+    if (is_empty_line(line)) {
+      free(line);
+      if (*ptr == '\n') ptr++;
+      continue;
+    }
+
+    // Headings: # H1, ## H2, etc.
+    int heading_level = count_heading_level(line);
+    if (heading_level > 0) {
+      const char *content = skip_whitespace(line);
+      while (*content == '#') content++;
+      content = skip_whitespace(content);
+
+      char tag[8];
+      snprintf(tag, sizeof(tag), "<h%d>", heading_level);
+      buffer_append(buf, tag);
+      process_inline(buf, content, strlen(content));
+      snprintf(tag, sizeof(tag), "</h%d>", heading_level);
+      buffer_append(buf, tag);
+
+      free(line);
+      if (*ptr == '\n') ptr++;
+      continue;
+    }
+
+    // Code block: ```
+    if (starts_with(line, "```")) {
+      buffer_append(buf, "<pre><code>");
+      free(line);
+      if (*ptr == '\n') ptr++;
+
+      // Collect code content
+      while (*ptr) {
+        line_start = ptr;
+        while (*ptr && *ptr != '\n') ptr++;
+        line_len = ptr - line_start;
+
+        line = (char *)malloc(line_len + 1);
+        if (!line) break;
+        memcpy(line, line_start, line_len);
+        line[line_len] = '\0';
+
+        if (starts_with(line, "```")) {
+          free(line);
+          if (*ptr == '\n') ptr++;
+          break;
+        }
+
+        // Escape HTML in code blocks
+        for (size_t i = 0; i < line_len; i++) {
+          if (line[i] == '<') buffer_append(buf, "&lt;");
+          else if (line[i] == '>') buffer_append(buf, "&gt;");
+          else if (line[i] == '&') buffer_append(buf, "&amp;");
+          else buffer_append_char(buf, line[i]);
+        }
+        buffer_append_char(buf, '\n');
+
+        free(line);
+        if (*ptr == '\n') ptr++;
+      }
+
+      buffer_append(buf, "</code></pre>");
+      continue;
+    }
+
+    // Blockquote: >
+    if (starts_with(line, ">")) {
+      buffer_append(buf, "<blockquote>");
+
+      while (1) {
+        const char *content = skip_whitespace(line);
+        if (*content == '>') content++;
+        content = skip_whitespace(content);
+        process_inline(buf, content, strlen(content));
+        buffer_append_char(buf, ' ');
+
+        free(line);
+        if (*ptr == '\n') ptr++;
+
+        // Check next line
+        if (!*ptr) break;
+        line_start = ptr;
+        while (*ptr && *ptr != '\n') ptr++;
+        line_len = ptr - line_start;
+
+        line = (char *)malloc(line_len + 1);
+        if (!line) break;
+        memcpy(line, line_start, line_len);
+        line[line_len] = '\0';
+
+        if (!starts_with(line, ">")) {
+          // Put back the line pointer
+          ptr = line_start;
+          free(line);
+          break;
+        }
+      }
+
+      buffer_append(buf, "</blockquote>");
+      continue;
+    }
+
+    // Horizontal rule
+    if (is_horizontal_rule(line)) {
+      buffer_append(buf, "<hr>");
+      free(line);
+      if (*ptr == '\n') ptr++;
+      continue;
+    }
+
+    // Unordered list
+    if (is_unordered_list(line)) {
+      buffer_append(buf, "<ul>");
+
+      while (1) {
+        const char *content = skip_whitespace(line);
+        content += 2; // Skip "- " or "* " or "+ "
+
+        buffer_append(buf, "<li>");
+        process_inline(buf, content, strlen(content));
+        buffer_append(buf, "</li>");
+
+        free(line);
+        if (*ptr == '\n') ptr++;
+
+        // Check next line
+        if (!*ptr) break;
+        line_start = ptr;
+        while (*ptr && *ptr != '\n') ptr++;
+        line_len = ptr - line_start;
+
+        line = (char *)malloc(line_len + 1);
+        if (!line) break;
+        memcpy(line, line_start, line_len);
+        line[line_len] = '\0';
+
+        if (!is_unordered_list(line)) {
+          ptr = line_start;
+          free(line);
+          break;
+        }
+      }
+
+      buffer_append(buf, "</ul>");
+      continue;
+    }
+
+    // Ordered list
+    if (is_ordered_list(line)) {
+      buffer_append(buf, "<ol>");
+
+      while (1) {
+        const char *content = skip_whitespace(line);
+        while (*content && isdigit((unsigned char)*content)) content++;
+        if (*content == '.') content++;
+        content = skip_whitespace(content);
+
+        buffer_append(buf, "<li>");
+        process_inline(buf, content, strlen(content));
+        buffer_append(buf, "</li>");
+
+        free(line);
+        if (*ptr == '\n') ptr++;
+
+        // Check next line
+        if (!*ptr) break;
+        line_start = ptr;
+        while (*ptr && *ptr != '\n') ptr++;
+        line_len = ptr - line_start;
+
+        line = (char *)malloc(line_len + 1);
+        if (!line) break;
+        memcpy(line, line_start, line_len);
+        line[line_len] = '\0';
+
+        if (!is_ordered_list(line)) {
+          ptr = line_start;
+          free(line);
+          break;
+        }
+      }
+
+      buffer_append(buf, "</ol>");
+      continue;
+    }
+
+    // Regular paragraph
+    buffer_append(buf, "<p>");
+
+    while (1) {
+      const char *content = skip_whitespace(line);
+      process_inline(buf, content, strlen(content));
+
+      free(line);
+      if (*ptr == '\n') ptr++;
+
+      // Check next line - continue paragraph if not special
+      if (!*ptr) break;
+      line_start = ptr;
+      while (*ptr && *ptr != '\n') ptr++;
+      line_len = ptr - line_start;
+
+      line = (char *)malloc(line_len + 1);
+      if (!line) break;
+      memcpy(line, line_start, line_len);
+      line[line_len] = '\0';
+
+      if (is_empty_line(line) ||
+          count_heading_level(line) > 0 ||
+          starts_with(line, "```") ||
+          starts_with(line, ">") ||
+          is_horizontal_rule(line) ||
+          is_unordered_list(line) ||
+          is_ordered_list(line)) {
+        ptr = line_start;
+        free(line);
+        break;
+      }
+
+      buffer_append_char(buf, ' ');
+    }
+
+    buffer_append(buf, "</p>");
+  }
+
+  char *result = buf->data;
+  free(buf); // Free struct but not data
+  return result;
+}
+
+// Free the returned HTML string
+MDAPI void markdown_free(char *html)
+{
+  free(html);
+}
+
+// Get length of HTML string (for WASM memory allocation)
+MDAPI size_t markdown_get_length(const char *html)
+{
+  return html ? strlen(html) : 0;
+}