Mercurial
view markdown_converter/markdown_to_html.c @ 176:fed99fc04e12 hg-web
[HgWeb] Problem with the emscript lol
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Wed, 21 Jan 2026 19:32:08 -0800 |
| parents | 1c0878eb17de |
| children | 8c74204fd362 |
line wrap: on
line source
#include <string.h> #include <stdlib.h> #include <stdio.h> #include <ctype.h> #include "markdown_converter/markdown_to_html.h" #define INITIAL_BUFFER_SIZE 1024 * 1024 // 1MB // String buffer for building HTML output typedef struct { char *data; size_t length; size_t capacity; } StringBuffer; static StringBuffer *buffer_create(size_t initial_capacity) { StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer)); if (!buf) return NULL; buf->data = (char *)malloc(initial_capacity); if (!buf->data) { free(buf); return NULL; } buf->data[0] = '\0'; buf->length = 0; buf->capacity = initial_capacity; return buf; } static void buffer_grow(StringBuffer *buf, size_t needed) { if (buf->length + needed + 1 > buf->capacity) { size_t new_capacity = buf->capacity * 2; while (new_capacity < buf->length + needed + 1) new_capacity *= 2; char *new_data = (char *)realloc(buf->data, new_capacity); if (new_data) { buf->data = new_data; buf->capacity = new_capacity; } } } static void buffer_append(StringBuffer *buf, const char *str) { size_t len = strlen(str); buffer_grow(buf, len); memcpy(buf->data + buf->length, str, len + 1); buf->length += len; } static void buffer_append_n(StringBuffer *buf, const char *str, size_t n) { buffer_grow(buf, n); memcpy(buf->data + buf->length, str, n); buf->length += n; buf->data[buf->length] = '\0'; } static void buffer_append_char(StringBuffer *buf, char c) { buffer_grow(buf, 1); buf->data[buf->length++] = c; buf->data[buf->length] = '\0'; } static void buffer_free(StringBuffer *buf) { if (buf) { free(buf->data); free(buf); } } // Check if line starts with pattern (after trimming whitespace) static int starts_with(const char *line, const char *pattern) { while (*line && isspace((unsigned char)*line)) line++; return strncmp(line, pattern, strlen(pattern)) == 0; } // Count leading # characters static int count_heading_level(const char *line) { int count = 0; while (*line && isspace((unsigned char)*line)) line++; while (line[count] == '#' && count < 6) count++; if (count > 0 && line[count] == ' ') return count; return 0; } // Skip whitespace static const char *skip_whitespace(const char *str) { while (*str && isspace((unsigned char)*str)) str++; return str; } // Check if line is empty (only whitespace) static int is_empty_line(const char *line) { while (*line) { if (!isspace((unsigned char)*line)) return 0; line++; } return 1; } // Check if line is horizontal rule (---, ***, ___) static int is_horizontal_rule(const char *line) { line = skip_whitespace(line); char first = *line; if (first != '-' && first != '*' && first != '_') return 0; int count = 0; while (*line) { if (*line == first) count++; else if (!isspace((unsigned char)*line)) return 0; line++; } return count >= 3; } // Check if line is unordered list item static int is_unordered_list(const char *line) { line = skip_whitespace(line); return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' '; } // Check if line is ordered list item static int is_ordered_list(const char *line) { line = skip_whitespace(line); while (*line && isdigit((unsigned char)*line)) line++; return *line == '.' && line[1] == ' '; } // Process inline markdown (bold, italic, code, links, strikethrough) static void process_inline(StringBuffer *buf, const char *text, size_t len) { size_t i = 0; while (i < len) { // Links: [text](url) if (text[i] == '[') { size_t link_start = i + 1; size_t link_end = link_start; while (link_end < len && text[link_end] != ']') link_end++; if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') { size_t url_start = link_end + 2; size_t url_end = url_start; while (url_end < len && text[url_end] != ')') url_end++; if (url_end < len) { buffer_append(buf, "<a href=\""); buffer_append_n(buf, text + url_start, url_end - url_start); buffer_append(buf, "\">"); buffer_append_n(buf, text + link_start, link_end - link_start); buffer_append(buf, "</a>"); i = url_end + 1; continue; } } } // Images:  if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') { size_t alt_start = i + 2; size_t alt_end = alt_start; while (alt_end < len && text[alt_end] != ']') alt_end++; if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') { size_t url_start = alt_end + 2; size_t url_end = url_start; while (url_end < len && text[url_end] != ')') url_end++; if (url_end < len) { buffer_append(buf, "<img src=\""); buffer_append_n(buf, text + url_start, url_end - url_start); buffer_append(buf, "\" alt=\""); buffer_append_n(buf, text + alt_start, alt_end - alt_start); buffer_append(buf, "\">"); i = url_end + 1; continue; } } } // Bold: **text** or __text__ if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') || (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) { char marker = text[i]; size_t start = i + 2; size_t end = start; while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++; if (end + 1 < len) { buffer_append(buf, "<strong>"); process_inline(buf, text + start, end - start); buffer_append(buf, "</strong>"); i = end + 2; continue; } } // Strikethrough: ~~text~~ if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') { size_t start = i + 2; size_t end = start; while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++; if (end + 1 < len) { buffer_append(buf, "<del>"); process_inline(buf, text + start, end - start); buffer_append(buf, "</del>"); i = end + 2; continue; } } // Italic: *text* or _text_ if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace((unsigned char)text[i + 1])) { char marker = text[i]; size_t start = i + 1; size_t end = start; while (end < len && text[end] != marker) end++; if (end < len && end > start) { buffer_append(buf, "<em>"); process_inline(buf, text + start, end - start); buffer_append(buf, "</em>"); i = end + 1; continue; } } // Inline code: `code` if (text[i] == '`') { size_t start = i + 1; size_t end = start; while (end < len && text[end] != '`') end++; if (end < len) { buffer_append(buf, "<code>"); buffer_append_n(buf, text + start, end - start); buffer_append(buf, "</code>"); i = end + 1; continue; } } // This might not be needed for now. // HTML escape special characters // if (text[i] == '<') { // buffer_append(buf, "<"); // } else if (text[i] == '>') { // buffer_append(buf, ">"); // } else if (text[i] == '&') { // buffer_append(buf, "&"); // } else { // buffer_append_char(buf, text[i]); // } buffer_append_char(buf, text[i]); i++; } } // Convert markdown to HTML MDAPI char *markdown_to_html(const char *markdown) { if (!markdown) return NULL; StringBuffer *buf = buffer_create(INITIAL_BUFFER_SIZE); if (!buf) return NULL; const char *ptr = markdown; const char *line_start; while (*ptr) { line_start = ptr; // Find end of line while (*ptr && *ptr != '\n') ptr++; size_t line_len = ptr - line_start; // Create null-terminated line copy char *line = (char *)malloc(line_len + 1); if (!line) { buffer_free(buf); return NULL; } memcpy(line, line_start, line_len); line[line_len] = '\0'; // Skip empty lines if (is_empty_line(line)) { free(line); if (*ptr == '\n') ptr++; continue; } // Headings: # H1, ## H2, etc. int heading_level = count_heading_level(line); if (heading_level > 0) { const char *content = skip_whitespace(line); while (*content == '#') content++; content = skip_whitespace(content); char tag[8]; snprintf(tag, sizeof(tag), "<h%d>", heading_level); buffer_append(buf, tag); process_inline(buf, content, strlen(content)); snprintf(tag, sizeof(tag), "</h%d>", heading_level); buffer_append(buf, tag); free(line); if (*ptr == '\n') ptr++; continue; } // Code block: ``` if (starts_with(line, "```")) { buffer_append(buf, "<pre><code>"); free(line); if (*ptr == '\n') ptr++; // Collect code content while (*ptr) { line_start = ptr; while (*ptr && *ptr != '\n') ptr++; line_len = ptr - line_start; line = (char *)malloc(line_len + 1); if (!line) break; memcpy(line, line_start, line_len); line[line_len] = '\0'; if (starts_with(line, "```")) { free(line); if (*ptr == '\n') ptr++; break; } // Escape HTML in code blocks for (size_t i = 0; i < line_len; i++) { if (line[i] == '<') buffer_append(buf, "<"); else if (line[i] == '>') buffer_append(buf, ">"); else if (line[i] == '&') buffer_append(buf, "&"); else buffer_append_char(buf, line[i]); } buffer_append_char(buf, '\n'); free(line); if (*ptr == '\n') ptr++; } buffer_append(buf, "</code></pre>"); continue; } // Blockquote: > if (starts_with(line, ">")) { buffer_append(buf, "<blockquote>"); while (1) { const char *content = skip_whitespace(line); if (*content == '>') content++; content = skip_whitespace(content); process_inline(buf, content, strlen(content)); buffer_append_char(buf, ' '); free(line); if (*ptr == '\n') ptr++; // Check next line if (!*ptr) break; line_start = ptr; while (*ptr && *ptr != '\n') ptr++; line_len = ptr - line_start; line = (char *)malloc(line_len + 1); if (!line) break; memcpy(line, line_start, line_len); line[line_len] = '\0'; if (!starts_with(line, ">")) { // Put back the line pointer ptr = line_start; free(line); break; } } buffer_append(buf, "</blockquote>"); continue; } // Horizontal rule if (is_horizontal_rule(line)) { buffer_append(buf, "<hr>"); free(line); if (*ptr == '\n') ptr++; continue; } // Unordered list if (is_unordered_list(line)) { buffer_append(buf, "<ul>"); while (1) { const char *content = skip_whitespace(line); content += 2; // Skip "- " or "* " or "+ " buffer_append(buf, "<li>"); process_inline(buf, content, strlen(content)); buffer_append(buf, "</li>"); free(line); if (*ptr == '\n') ptr++; // Check next line if (!*ptr) break; line_start = ptr; while (*ptr && *ptr != '\n') ptr++; line_len = ptr - line_start; line = (char *)malloc(line_len + 1); if (!line) break; memcpy(line, line_start, line_len); line[line_len] = '\0'; if (!is_unordered_list(line)) { ptr = line_start; free(line); break; } } buffer_append(buf, "</ul>"); continue; } // Ordered list if (is_ordered_list(line)) { buffer_append(buf, "<ol>"); while (1) { const char *content = skip_whitespace(line); while (*content && isdigit((unsigned char)*content)) content++; if (*content == '.') content++; content = skip_whitespace(content); buffer_append(buf, "<li>"); process_inline(buf, content, strlen(content)); buffer_append(buf, "</li>"); free(line); if (*ptr == '\n') ptr++; // Check next line if (!*ptr) break; line_start = ptr; while (*ptr && *ptr != '\n') ptr++; line_len = ptr - line_start; line = (char *)malloc(line_len + 1); if (!line) break; memcpy(line, line_start, line_len); line[line_len] = '\0'; if (!is_ordered_list(line)) { ptr = line_start; free(line); break; } } buffer_append(buf, "</ol>"); continue; } // Regular paragraph buffer_append(buf, "<p>"); while (1) { const char *content = skip_whitespace(line); process_inline(buf, content, strlen(content)); free(line); if (*ptr == '\n') ptr++; // Check next line - continue paragraph if not special if (!*ptr) break; line_start = ptr; while (*ptr && *ptr != '\n') ptr++; line_len = ptr - line_start; line = (char *)malloc(line_len + 1); if (!line) break; memcpy(line, line_start, line_len); line[line_len] = '\0'; if (is_empty_line(line) || count_heading_level(line) > 0 || starts_with(line, "```") || starts_with(line, ">") || is_horizontal_rule(line) || is_unordered_list(line) || is_ordered_list(line)) { ptr = line_start; free(line); break; } buffer_append_char(buf, ' '); } buffer_append(buf, "</p>"); } char *result = buf->data; free(buf); // Free struct but not data return result; } // Free the returned HTML string MDAPI void markdown_free(char *html) { free(html); } // Get length of HTML string (for WASM memory allocation) MDAPI size_t markdown_get_length(const char *html) { return html ? strlen(html) : 0; }