Mercurial
view markdown_converter/wasm/markdown_to_html_wasm.c @ 176:fed99fc04e12 hg-web
[HgWeb] Problem with the emscript lol
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Wed, 21 Jan 2026 19:32:08 -0800 |
| parents | cd35e600ae34 |
| children |
line wrap: on
line source
/** * Markdown to HTML Converter - Standalone WASM Implementation * No libc dependencies - can be compiled with: clang --target=wasm32 */ #define WASM_EXPORT __attribute__((visibility("default"))) typedef unsigned long size_t; typedef int int32_t; // Simple bump allocator for WASM #define HEAP_SIZE (1024 * 1024) // 1MB heap static char heap[HEAP_SIZE]; static size_t heap_offset = 0; WASM_EXPORT void *malloc(size_t size) { // Align to 8 bytes size_t aligned_offset = (heap_offset + 7) & ~7; if (aligned_offset + size > HEAP_SIZE) return 0; void *ptr = &heap[aligned_offset]; heap_offset = aligned_offset + size; return ptr; } WASM_EXPORT void free(void *ptr) { // Simple bump allocator - no actual free (void)ptr; } WASM_EXPORT void heap_reset(void) { heap_offset = 0; } // String functions static size_t strlen(const char *s) { size_t len = 0; while (s[len]) len++; return len; } static void *memcpy(void *dest, const void *src, size_t n) { char *d = (char *)dest; const char *s = (const char *)src; while (n--) *d++ = *s++; return dest; } static int isspace_c(int c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'; } static int isdigit_c(int c) { return c >= '0' && c <= '9'; } // String buffer for building HTML output typedef struct { char *data; size_t length; size_t capacity; } StringBuffer; static StringBuffer *buffer_create(size_t initial_capacity) { StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer)); if (!buf) return 0; buf->data = (char *)malloc(initial_capacity); if (!buf->data) return 0; buf->data[0] = '\0'; buf->length = 0; buf->capacity = initial_capacity; return buf; } static void buffer_grow(StringBuffer *buf, size_t needed) { if (buf->length + needed + 1 > buf->capacity) { size_t new_capacity = buf->capacity * 2; while (new_capacity < buf->length + needed + 1) new_capacity *= 2; char *new_data = (char *)malloc(new_capacity); if (new_data) { memcpy(new_data, buf->data, buf->length + 1); buf->data = new_data; buf->capacity = new_capacity; } } } static void buffer_append(StringBuffer *buf, const char *str) { size_t len = strlen(str); buffer_grow(buf, len); memcpy(buf->data + buf->length, str, len + 1); buf->length += len; } static void buffer_append_n(StringBuffer *buf, const char *str, size_t n) { buffer_grow(buf, n); memcpy(buf->data + buf->length, str, n); buf->length += n; buf->data[buf->length] = '\0'; } static void buffer_append_char(StringBuffer *buf, char c) { buffer_grow(buf, 1); buf->data[buf->length++] = c; buf->data[buf->length] = '\0'; } // Check if line starts with pattern (after trimming whitespace) static int starts_with(const char *line, const char *pattern) { while (*line && isspace_c(*line)) line++; size_t plen = strlen(pattern); for (size_t i = 0; i < plen; i++) { if (line[i] != pattern[i]) return 0; } return 1; } // Count leading # characters static int count_heading_level(const char *line) { int count = 0; while (*line && isspace_c(*line)) line++; while (line[count] == '#' && count < 6) count++; if (count > 0 && line[count] == ' ') return count; return 0; } // Skip whitespace static const char *skip_whitespace(const char *str) { while (*str && isspace_c(*str)) str++; return str; } // Check if line is empty static int is_empty_line(const char *line) { while (*line) { if (!isspace_c(*line)) return 0; line++; } return 1; } // Check if line is horizontal rule static int is_horizontal_rule(const char *line) { line = skip_whitespace(line); char first = *line; if (first != '-' && first != '*' && first != '_') return 0; int count = 0; while (*line) { if (*line == first) count++; else if (!isspace_c(*line)) return 0; line++; } return count >= 3; } // Check if line is unordered list item static int is_unordered_list(const char *line) { line = skip_whitespace(line); return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' '; } // Check if line is ordered list item static int is_ordered_list(const char *line) { line = skip_whitespace(line); while (*line && isdigit_c(*line)) line++; return *line == '.' && line[1] == ' '; } // Check if line is a table row (starts with |) static int is_table_row(const char *line) { line = skip_whitespace(line); return *line == '|'; } // Check if line is a table separator row (| --- | --- |) static int is_table_separator(const char *line) { line = skip_whitespace(line); if (*line != '|') return 0; line++; int has_dash = 0; while (*line) { if (*line == '-') has_dash = 1; else if (*line == '|' || *line == ':' || isspace_c(*line)) { /* ok */ } else return 0; line++; } return has_dash; } // Forward declaration for process_inline static void process_inline(StringBuffer *buf, const char *text, size_t len); // Parse table cells from a row and append to buffer static void parse_table_row(StringBuffer *buf, const char *line, int is_header) { const char *cell_tag = is_header ? "th" : "td"; buffer_append(buf, "<tr>"); line = skip_whitespace(line); if (*line == '|') line++; // Skip leading | while (*line) { // Skip whitespace before cell content while (*line && isspace_c(*line)) line++; // Find cell end (next | or end of line) const char *cell_start = line; while (*line && *line != '|') line++; // Trim trailing whitespace from cell const char *cell_end = line; while (cell_end > cell_start && isspace_c(*(cell_end - 1))) cell_end--; size_t cell_len = cell_end - cell_start; // Only output cell if we have content or more cells coming if (cell_len > 0 || *line == '|') { buffer_append(buf, "<"); buffer_append(buf, cell_tag); buffer_append(buf, ">"); if (cell_len > 0) { process_inline(buf, cell_start, cell_len); } buffer_append(buf, "</"); buffer_append(buf, cell_tag); buffer_append(buf, ">"); } if (*line == '|') line++; // Skip | // Check if this was the trailing | const char *rest = line; while (*rest && isspace_c(*rest)) rest++; if (!*rest) break; // End of line after trailing | } buffer_append(buf, "</tr>"); } // Process inline markdown static void process_inline(StringBuffer *buf, const char *text, size_t len) { size_t i = 0; while (i < len) { // Links: [text](url) if (text[i] == '[') { size_t link_start = i + 1; size_t link_end = link_start; while (link_end < len && text[link_end] != ']') link_end++; if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') { size_t url_start = link_end + 2; size_t url_end = url_start; while (url_end < len && text[url_end] != ')') url_end++; if (url_end < len) { buffer_append(buf, "<a href=\""); buffer_append_n(buf, text + url_start, url_end - url_start); buffer_append(buf, "\">"); buffer_append_n(buf, text + link_start, link_end - link_start); buffer_append(buf, "</a>"); i = url_end + 1; continue; } } } // Images:  if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') { size_t alt_start = i + 2; size_t alt_end = alt_start; while (alt_end < len && text[alt_end] != ']') alt_end++; if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') { size_t url_start = alt_end + 2; size_t url_end = url_start; while (url_end < len && text[url_end] != ')') url_end++; if (url_end < len) { buffer_append(buf, "<img src=\""); buffer_append_n(buf, text + url_start, url_end - url_start); buffer_append(buf, "\" alt=\""); buffer_append_n(buf, text + alt_start, alt_end - alt_start); buffer_append(buf, "\">"); i = url_end + 1; continue; } } } // Bold: **text** or __text__ if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') || (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) { char marker = text[i]; size_t start = i + 2; size_t end = start; while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++; if (end + 1 < len) { buffer_append(buf, "<strong>"); process_inline(buf, text + start, end - start); buffer_append(buf, "</strong>"); i = end + 2; continue; } } // Strikethrough: ~~text~~ if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') { size_t start = i + 2; size_t end = start; while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++; if (end + 1 < len) { buffer_append(buf, "<del>"); process_inline(buf, text + start, end - start); buffer_append(buf, "</del>"); i = end + 2; continue; } } // Italic: *text* or _text_ if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace_c(text[i + 1])) { char marker = text[i]; size_t start = i + 1; size_t end = start; while (end < len && text[end] != marker) end++; if (end < len && end > start) { buffer_append(buf, "<em>"); process_inline(buf, text + start, end - start); buffer_append(buf, "</em>"); i = end + 1; continue; } } // Inline code: `code` if (text[i] == '`') { size_t start = i + 1; size_t end = start; while (end < len && text[end] != '`') end++; if (end < len) { buffer_append(buf, "<code>"); buffer_append_n(buf, text + start, end - start); buffer_append(buf, "</code>"); i = end + 1; continue; } } // HTML escape if (text[i] == '<') { buffer_append(buf, "<"); } else if (text[i] == '>') { buffer_append(buf, ">"); } else if (text[i] == '&') { buffer_append(buf, "&"); } else { buffer_append_char(buf, text[i]); } i++; } } // Append heading tag static void append_heading_tag(StringBuffer *buf, int level, int closing) { buffer_append_char(buf, '<'); if (closing) buffer_append_char(buf, '/'); buffer_append_char(buf, 'h'); buffer_append_char(buf, '0' + level); buffer_append_char(buf, '>'); } // Convert markdown to HTML WASM_EXPORT char *markdown_to_html(const char *markdown) { if (!markdown) return 0; StringBuffer *buf = buffer_create(4096); if (!buf) return 0; const char *ptr = markdown; const char *line_start; while (*ptr) { line_start = ptr; // Find end of line while (*ptr && *ptr != '\n') ptr++; size_t line_len = ptr - line_start; // Create line copy char *line = (char *)malloc(line_len + 1); if (!line) return buf->data; memcpy(line, line_start, line_len); line[line_len] = '\0'; // Skip empty lines if (is_empty_line(line)) { if (*ptr == '\n') ptr++; continue; } // Headings int heading_level = count_heading_level(line); if (heading_level > 0) { const char *content = skip_whitespace(line); while (*content == '#') content++; content = skip_whitespace(content); append_heading_tag(buf, heading_level, 0); process_inline(buf, content, strlen(content)); append_heading_tag(buf, heading_level, 1); if (*ptr == '\n') ptr++; continue; } // Code block if (starts_with(line, "```")) { buffer_append(buf, "<pre><code>"); if (*ptr == '\n') ptr++; while (*ptr) { line_start = ptr; while (*ptr && *ptr != '\n') ptr++; line_len = ptr - line_start; char *code_line = (char *)malloc(line_len + 1); if (!code_line) break; memcpy(code_line, line_start, line_len); code_line[line_len] = '\0'; if (starts_with(code_line, "```")) { if (*ptr == '\n') ptr++; break; } for (size_t i = 0; i < line_len; i++) { if (code_line[i] == '<') buffer_append(buf, "<"); else if (code_line[i] == '>') buffer_append(buf, ">"); else if (code_line[i] == '&') buffer_append(buf, "&"); else buffer_append_char(buf, code_line[i]); } buffer_append_char(buf, '\n'); if (*ptr == '\n') ptr++; } buffer_append(buf, "</code></pre>"); continue; } // Blockquote if (starts_with(line, ">")) { buffer_append(buf, "<blockquote>"); while (1) { const char *content = skip_whitespace(line); if (*content == '>') content++; content = skip_whitespace(content); process_inline(buf, content, strlen(content)); buffer_append_char(buf, ' '); if (*ptr == '\n') ptr++; if (!*ptr) break; line_start = ptr; while (*ptr && *ptr != '\n') ptr++; line_len = ptr - line_start; line = (char *)malloc(line_len + 1); if (!line) break; memcpy(line, line_start, line_len); line[line_len] = '\0'; if (!starts_with(line, ">")) { ptr = line_start; break; } } buffer_append(buf, "</blockquote>"); continue; } // Horizontal rule if (is_horizontal_rule(line)) { buffer_append(buf, "<hr>"); if (*ptr == '\n') ptr++; continue; } // Unordered list if (is_unordered_list(line)) { buffer_append(buf, "<ul>"); while (1) { const char *content = skip_whitespace(line); content += 2; buffer_append(buf, "<li>"); process_inline(buf, content, strlen(content)); buffer_append(buf, "</li>"); if (*ptr == '\n') ptr++; if (!*ptr) break; line_start = ptr; while (*ptr && *ptr != '\n') ptr++; line_len = ptr - line_start; line = (char *)malloc(line_len + 1); if (!line) break; memcpy(line, line_start, line_len); line[line_len] = '\0'; if (!is_unordered_list(line)) { ptr = line_start; break; } } buffer_append(buf, "</ul>"); continue; } // Ordered list if (is_ordered_list(line)) { buffer_append(buf, "<ol>"); while (1) { const char *content = skip_whitespace(line); while (*content && isdigit_c(*content)) content++; if (*content == '.') content++; content = skip_whitespace(content); buffer_append(buf, "<li>"); process_inline(buf, content, strlen(content)); buffer_append(buf, "</li>"); if (*ptr == '\n') ptr++; if (!*ptr) break; line_start = ptr; while (*ptr && *ptr != '\n') ptr++; line_len = ptr - line_start; line = (char *)malloc(line_len + 1); if (!line) break; memcpy(line, line_start, line_len); line[line_len] = '\0'; if (!is_ordered_list(line)) { ptr = line_start; break; } } buffer_append(buf, "</ol>"); continue; } // Table if (is_table_row(line)) { // Check if next line is a separator (to confirm this is a table) const char *peek_ptr = ptr; if (*peek_ptr == '\n') peek_ptr++; const char *next_line_start = peek_ptr; while (*peek_ptr && *peek_ptr != '\n') peek_ptr++; size_t next_line_len = peek_ptr - next_line_start; char *next_line = (char *)malloc(next_line_len + 1); if (next_line) { memcpy(next_line, next_line_start, next_line_len); next_line[next_line_len] = '\0'; if (is_table_separator(next_line)) { // It's a valid table buffer_append(buf, "<table>"); // Header row buffer_append(buf, "<thead>"); parse_table_row(buf, line, 1); buffer_append(buf, "</thead>"); // Skip to after separator if (*ptr == '\n') ptr++; ptr = peek_ptr; if (*ptr == '\n') ptr++; // Body rows buffer_append(buf, "<tbody>"); while (*ptr) { line_start = ptr; while (*ptr && *ptr != '\n') ptr++; line_len = ptr - line_start; line = (char *)malloc(line_len + 1); if (!line) break; memcpy(line, line_start, line_len); line[line_len] = '\0'; if (!is_table_row(line) || is_empty_line(line)) { ptr = line_start; break; } parse_table_row(buf, line, 0); if (*ptr == '\n') ptr++; } buffer_append(buf, "</tbody>"); buffer_append(buf, "</table>"); continue; } } } // Paragraph buffer_append(buf, "<p>"); while (1) { const char *content = skip_whitespace(line); process_inline(buf, content, strlen(content)); if (*ptr == '\n') ptr++; if (!*ptr) break; line_start = ptr; while (*ptr && *ptr != '\n') ptr++; line_len = ptr - line_start; line = (char *)malloc(line_len + 1); if (!line) break; memcpy(line, line_start, line_len); line[line_len] = '\0'; if (is_empty_line(line) || count_heading_level(line) > 0 || starts_with(line, "```") || starts_with(line, ">") || is_horizontal_rule(line) || is_unordered_list(line) || is_ordered_list(line) || is_table_row(line)) { ptr = line_start; break; } buffer_append_char(buf, ' '); } buffer_append(buf, "</p>"); } return buf->data; } // Get string length (for JS interop) WASM_EXPORT size_t markdown_strlen(const char *str) { return str ? strlen(str) : 0; }