Mercurial
diff markdown_converter/markdown_to_html.c @ 154:bdcc610eeed8
[Markdown Converter][GuiZe] Added markdown coverter in C and wasm rule sets. Needs further view on this as I haven't taken a look. Written by Claude.
| author | June Park <parkjune1995@gmail.com> |
|---|---|
| date | Mon, 12 Jan 2026 09:11:58 -0800 |
| parents | |
| children | cd35e600ae34 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/markdown_converter/markdown_to_html.c Mon Jan 12 09:11:58 2026 -0800 @@ -0,0 +1,545 @@ +/** + * Markdown to HTML Converter - C Implementation + * Supports: headers, bold, italic, links, lists, code blocks, blockquotes, horizontal rules + */ + +#include "markdown_to_html.h" +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> + +#define INITIAL_BUFFER_SIZE 4096 + +// String buffer for building HTML output +typedef struct { + char *data; + size_t length; + size_t capacity; +} StringBuffer; + +static StringBuffer *buffer_create(size_t initial_capacity) +{ + StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer)); + if (!buf) return NULL; + + buf->data = (char *)malloc(initial_capacity); + if (!buf->data) { + free(buf); + return NULL; + } + buf->data[0] = '\0'; + buf->length = 0; + buf->capacity = initial_capacity; + return buf; +} + +static void buffer_grow(StringBuffer *buf, size_t needed) +{ + if (buf->length + needed + 1 > buf->capacity) { + size_t new_capacity = buf->capacity * 2; + while (new_capacity < buf->length + needed + 1) + new_capacity *= 2; + + char *new_data = (char *)realloc(buf->data, new_capacity); + if (new_data) { + buf->data = new_data; + buf->capacity = new_capacity; + } + } +} + +static void buffer_append(StringBuffer *buf, const char *str) +{ + size_t len = strlen(str); + buffer_grow(buf, len); + memcpy(buf->data + buf->length, str, len + 1); + buf->length += len; +} + +static void buffer_append_n(StringBuffer *buf, const char *str, size_t n) +{ + buffer_grow(buf, n); + memcpy(buf->data + buf->length, str, n); + buf->length += n; + buf->data[buf->length] = '\0'; +} + +static void buffer_append_char(StringBuffer *buf, char c) +{ + buffer_grow(buf, 1); + buf->data[buf->length++] = c; + buf->data[buf->length] = '\0'; +} + +static void buffer_free(StringBuffer *buf) +{ + if (buf) { + free(buf->data); + free(buf); + } +} + +// Check if line starts with pattern (after trimming whitespace) +static int starts_with(const char *line, const char *pattern) +{ + while (*line && isspace((unsigned char)*line)) line++; + return strncmp(line, pattern, strlen(pattern)) == 0; +} + +// Count leading # characters +static int count_heading_level(const char *line) +{ + int count = 0; + while (*line && isspace((unsigned char)*line)) line++; + while (line[count] == '#' && count < 6) count++; + if (count > 0 && line[count] == ' ') return count; + return 0; +} + +// Skip whitespace +static const char *skip_whitespace(const char *str) +{ + while (*str && isspace((unsigned char)*str)) str++; + return str; +} + +// Check if line is empty (only whitespace) +static int is_empty_line(const char *line) +{ + while (*line) { + if (!isspace((unsigned char)*line)) return 0; + line++; + } + return 1; +} + +// Check if line is horizontal rule (---, ***, ___) +static int is_horizontal_rule(const char *line) +{ + line = skip_whitespace(line); + char first = *line; + if (first != '-' && first != '*' && first != '_') return 0; + + int count = 0; + while (*line) { + if (*line == first) count++; + else if (!isspace((unsigned char)*line)) return 0; + line++; + } + return count >= 3; +} + +// Check if line is unordered list item +static int is_unordered_list(const char *line) +{ + line = skip_whitespace(line); + return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' '; +} + +// Check if line is ordered list item +static int is_ordered_list(const char *line) +{ + line = skip_whitespace(line); + while (*line && isdigit((unsigned char)*line)) line++; + return *line == '.' && line[1] == ' '; +} + +// Process inline markdown (bold, italic, code, links, strikethrough) +static void process_inline(StringBuffer *buf, const char *text, size_t len) +{ + size_t i = 0; + + while (i < len) { + // Links: [text](url) + if (text[i] == '[') { + size_t link_start = i + 1; + size_t link_end = link_start; + while (link_end < len && text[link_end] != ']') link_end++; + + if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') { + size_t url_start = link_end + 2; + size_t url_end = url_start; + while (url_end < len && text[url_end] != ')') url_end++; + + if (url_end < len) { + buffer_append(buf, "<a href=\""); + buffer_append_n(buf, text + url_start, url_end - url_start); + buffer_append(buf, "\">"); + buffer_append_n(buf, text + link_start, link_end - link_start); + buffer_append(buf, "</a>"); + i = url_end + 1; + continue; + } + } + } + + // Images:  + if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') { + size_t alt_start = i + 2; + size_t alt_end = alt_start; + while (alt_end < len && text[alt_end] != ']') alt_end++; + + if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') { + size_t url_start = alt_end + 2; + size_t url_end = url_start; + while (url_end < len && text[url_end] != ')') url_end++; + + if (url_end < len) { + buffer_append(buf, "<img src=\""); + buffer_append_n(buf, text + url_start, url_end - url_start); + buffer_append(buf, "\" alt=\""); + buffer_append_n(buf, text + alt_start, alt_end - alt_start); + buffer_append(buf, "\">"); + i = url_end + 1; + continue; + } + } + } + + // Bold: **text** or __text__ + if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') || + (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) { + char marker = text[i]; + size_t start = i + 2; + size_t end = start; + while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++; + + if (end + 1 < len) { + buffer_append(buf, "<strong>"); + process_inline(buf, text + start, end - start); + buffer_append(buf, "</strong>"); + i = end + 2; + continue; + } + } + + // Strikethrough: ~~text~~ + if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') { + size_t start = i + 2; + size_t end = start; + while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++; + + if (end + 1 < len) { + buffer_append(buf, "<del>"); + process_inline(buf, text + start, end - start); + buffer_append(buf, "</del>"); + i = end + 2; + continue; + } + } + + // Italic: *text* or _text_ + if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace((unsigned char)text[i + 1])) { + char marker = text[i]; + size_t start = i + 1; + size_t end = start; + while (end < len && text[end] != marker) end++; + + if (end < len && end > start) { + buffer_append(buf, "<em>"); + process_inline(buf, text + start, end - start); + buffer_append(buf, "</em>"); + i = end + 1; + continue; + } + } + + // Inline code: `code` + if (text[i] == '`') { + size_t start = i + 1; + size_t end = start; + while (end < len && text[end] != '`') end++; + + if (end < len) { + buffer_append(buf, "<code>"); + buffer_append_n(buf, text + start, end - start); + buffer_append(buf, "</code>"); + i = end + 1; + continue; + } + } + + // HTML escape special characters + if (text[i] == '<') { + buffer_append(buf, "<"); + } else if (text[i] == '>') { + buffer_append(buf, ">"); + } else if (text[i] == '&') { + buffer_append(buf, "&"); + } else { + buffer_append_char(buf, text[i]); + } + i++; + } +} + +// Convert markdown to HTML +MDAPI char *markdown_to_html(const char *markdown) +{ + if (!markdown) return NULL; + + StringBuffer *buf = buffer_create(INITIAL_BUFFER_SIZE); + if (!buf) return NULL; + + const char *ptr = markdown; + const char *line_start; + + while (*ptr) { + line_start = ptr; + + // Find end of line + while (*ptr && *ptr != '\n') ptr++; + size_t line_len = ptr - line_start; + + // Create null-terminated line copy + char *line = (char *)malloc(line_len + 1); + if (!line) { + buffer_free(buf); + return NULL; + } + memcpy(line, line_start, line_len); + line[line_len] = '\0'; + + // Skip empty lines + if (is_empty_line(line)) { + free(line); + if (*ptr == '\n') ptr++; + continue; + } + + // Headings: # H1, ## H2, etc. + int heading_level = count_heading_level(line); + if (heading_level > 0) { + const char *content = skip_whitespace(line); + while (*content == '#') content++; + content = skip_whitespace(content); + + char tag[8]; + snprintf(tag, sizeof(tag), "<h%d>", heading_level); + buffer_append(buf, tag); + process_inline(buf, content, strlen(content)); + snprintf(tag, sizeof(tag), "</h%d>", heading_level); + buffer_append(buf, tag); + + free(line); + if (*ptr == '\n') ptr++; + continue; + } + + // Code block: ``` + if (starts_with(line, "```")) { + buffer_append(buf, "<pre><code>"); + free(line); + if (*ptr == '\n') ptr++; + + // Collect code content + while (*ptr) { + line_start = ptr; + while (*ptr && *ptr != '\n') ptr++; + line_len = ptr - line_start; + + line = (char *)malloc(line_len + 1); + if (!line) break; + memcpy(line, line_start, line_len); + line[line_len] = '\0'; + + if (starts_with(line, "```")) { + free(line); + if (*ptr == '\n') ptr++; + break; + } + + // Escape HTML in code blocks + for (size_t i = 0; i < line_len; i++) { + if (line[i] == '<') buffer_append(buf, "<"); + else if (line[i] == '>') buffer_append(buf, ">"); + else if (line[i] == '&') buffer_append(buf, "&"); + else buffer_append_char(buf, line[i]); + } + buffer_append_char(buf, '\n'); + + free(line); + if (*ptr == '\n') ptr++; + } + + buffer_append(buf, "</code></pre>"); + continue; + } + + // Blockquote: > + if (starts_with(line, ">")) { + buffer_append(buf, "<blockquote>"); + + while (1) { + const char *content = skip_whitespace(line); + if (*content == '>') content++; + content = skip_whitespace(content); + process_inline(buf, content, strlen(content)); + buffer_append_char(buf, ' '); + + free(line); + if (*ptr == '\n') ptr++; + + // Check next line + if (!*ptr) break; + line_start = ptr; + while (*ptr && *ptr != '\n') ptr++; + line_len = ptr - line_start; + + line = (char *)malloc(line_len + 1); + if (!line) break; + memcpy(line, line_start, line_len); + line[line_len] = '\0'; + + if (!starts_with(line, ">")) { + // Put back the line pointer + ptr = line_start; + free(line); + break; + } + } + + buffer_append(buf, "</blockquote>"); + continue; + } + + // Horizontal rule + if (is_horizontal_rule(line)) { + buffer_append(buf, "<hr>"); + free(line); + if (*ptr == '\n') ptr++; + continue; + } + + // Unordered list + if (is_unordered_list(line)) { + buffer_append(buf, "<ul>"); + + while (1) { + const char *content = skip_whitespace(line); + content += 2; // Skip "- " or "* " or "+ " + + buffer_append(buf, "<li>"); + process_inline(buf, content, strlen(content)); + buffer_append(buf, "</li>"); + + free(line); + if (*ptr == '\n') ptr++; + + // Check next line + if (!*ptr) break; + line_start = ptr; + while (*ptr && *ptr != '\n') ptr++; + line_len = ptr - line_start; + + line = (char *)malloc(line_len + 1); + if (!line) break; + memcpy(line, line_start, line_len); + line[line_len] = '\0'; + + if (!is_unordered_list(line)) { + ptr = line_start; + free(line); + break; + } + } + + buffer_append(buf, "</ul>"); + continue; + } + + // Ordered list + if (is_ordered_list(line)) { + buffer_append(buf, "<ol>"); + + while (1) { + const char *content = skip_whitespace(line); + while (*content && isdigit((unsigned char)*content)) content++; + if (*content == '.') content++; + content = skip_whitespace(content); + + buffer_append(buf, "<li>"); + process_inline(buf, content, strlen(content)); + buffer_append(buf, "</li>"); + + free(line); + if (*ptr == '\n') ptr++; + + // Check next line + if (!*ptr) break; + line_start = ptr; + while (*ptr && *ptr != '\n') ptr++; + line_len = ptr - line_start; + + line = (char *)malloc(line_len + 1); + if (!line) break; + memcpy(line, line_start, line_len); + line[line_len] = '\0'; + + if (!is_ordered_list(line)) { + ptr = line_start; + free(line); + break; + } + } + + buffer_append(buf, "</ol>"); + continue; + } + + // Regular paragraph + buffer_append(buf, "<p>"); + + while (1) { + const char *content = skip_whitespace(line); + process_inline(buf, content, strlen(content)); + + free(line); + if (*ptr == '\n') ptr++; + + // Check next line - continue paragraph if not special + if (!*ptr) break; + line_start = ptr; + while (*ptr && *ptr != '\n') ptr++; + line_len = ptr - line_start; + + line = (char *)malloc(line_len + 1); + if (!line) break; + memcpy(line, line_start, line_len); + line[line_len] = '\0'; + + if (is_empty_line(line) || + count_heading_level(line) > 0 || + starts_with(line, "```") || + starts_with(line, ">") || + is_horizontal_rule(line) || + is_unordered_list(line) || + is_ordered_list(line)) { + ptr = line_start; + free(line); + break; + } + + buffer_append_char(buf, ' '); + } + + buffer_append(buf, "</p>"); + } + + char *result = buf->data; + free(buf); // Free struct but not data + return result; +} + +// Free the returned HTML string +MDAPI void markdown_free(char *html) +{ + free(html); +} + +// Get length of HTML string (for WASM memory allocation) +MDAPI size_t markdown_get_length(const char *html) +{ + return html ? strlen(html) : 0; +}