Mercurial
diff markdown_converter/wasm/markdown_to_html_wasm.c @ 173:827c6ac504cd hg-web
Merged in default here.
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Mon, 19 Jan 2026 18:59:10 -0800 |
| parents | cd35e600ae34 |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/markdown_converter/wasm/markdown_to_html_wasm.c Mon Jan 19 18:59:10 2026 -0800 @@ -0,0 +1,698 @@ +/** + * Markdown to HTML Converter - Standalone WASM Implementation + * No libc dependencies - can be compiled with: clang --target=wasm32 + */ + +#define WASM_EXPORT __attribute__((visibility("default"))) + +typedef unsigned long size_t; +typedef int int32_t; + +// Simple bump allocator for WASM +#define HEAP_SIZE (1024 * 1024) // 1MB heap +static char heap[HEAP_SIZE]; +static size_t heap_offset = 0; + +WASM_EXPORT void *malloc(size_t size) +{ + // Align to 8 bytes + size_t aligned_offset = (heap_offset + 7) & ~7; + if (aligned_offset + size > HEAP_SIZE) return 0; + + void *ptr = &heap[aligned_offset]; + heap_offset = aligned_offset + size; + return ptr; +} + +WASM_EXPORT void free(void *ptr) +{ + // Simple bump allocator - no actual free + (void)ptr; +} + +WASM_EXPORT void heap_reset(void) +{ + heap_offset = 0; +} + +// String functions +static size_t strlen(const char *s) +{ + size_t len = 0; + while (s[len]) len++; + return len; +} + +static void *memcpy(void *dest, const void *src, size_t n) +{ + char *d = (char *)dest; + const char *s = (const char *)src; + while (n--) *d++ = *s++; + return dest; +} + +static int isspace_c(int c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'; +} + +static int isdigit_c(int c) +{ + return c >= '0' && c <= '9'; +} + +// String buffer for building HTML output +typedef struct { + char *data; + size_t length; + size_t capacity; +} StringBuffer; + +static StringBuffer *buffer_create(size_t initial_capacity) +{ + StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer)); + if (!buf) return 0; + + buf->data = (char *)malloc(initial_capacity); + if (!buf->data) return 0; + + buf->data[0] = '\0'; + buf->length = 0; + buf->capacity = initial_capacity; + return buf; +} + +static void buffer_grow(StringBuffer *buf, size_t needed) +{ + if (buf->length + needed + 1 > buf->capacity) { + size_t new_capacity = buf->capacity * 2; + while (new_capacity < buf->length + needed + 1) + new_capacity *= 2; + + char *new_data = (char *)malloc(new_capacity); + if (new_data) { + memcpy(new_data, buf->data, buf->length + 1); + buf->data = new_data; + buf->capacity = new_capacity; + } + } +} + +static void buffer_append(StringBuffer *buf, const char *str) +{ + size_t len = strlen(str); + buffer_grow(buf, len); + memcpy(buf->data + buf->length, str, len + 1); + buf->length += len; +} + +static void buffer_append_n(StringBuffer *buf, const char *str, size_t n) +{ + buffer_grow(buf, n); + memcpy(buf->data + buf->length, str, n); + buf->length += n; + buf->data[buf->length] = '\0'; +} + +static void buffer_append_char(StringBuffer *buf, char c) +{ + buffer_grow(buf, 1); + buf->data[buf->length++] = c; + buf->data[buf->length] = '\0'; +} + +// Check if line starts with pattern (after trimming whitespace) +static int starts_with(const char *line, const char *pattern) +{ + while (*line && isspace_c(*line)) line++; + size_t plen = strlen(pattern); + for (size_t i = 0; i < plen; i++) { + if (line[i] != pattern[i]) return 0; + } + return 1; +} + +// Count leading # characters +static int count_heading_level(const char *line) +{ + int count = 0; + while (*line && isspace_c(*line)) line++; + while (line[count] == '#' && count < 6) count++; + if (count > 0 && line[count] == ' ') return count; + return 0; +} + +// Skip whitespace +static const char *skip_whitespace(const char *str) +{ + while (*str && isspace_c(*str)) str++; + return str; +} + +// Check if line is empty +static int is_empty_line(const char *line) +{ + while (*line) { + if (!isspace_c(*line)) return 0; + line++; + } + return 1; +} + +// Check if line is horizontal rule +static int is_horizontal_rule(const char *line) +{ + line = skip_whitespace(line); + char first = *line; + if (first != '-' && first != '*' && first != '_') return 0; + + int count = 0; + while (*line) { + if (*line == first) count++; + else if (!isspace_c(*line)) return 0; + line++; + } + return count >= 3; +} + +// Check if line is unordered list item +static int is_unordered_list(const char *line) +{ + line = skip_whitespace(line); + return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' '; +} + +// Check if line is ordered list item +static int is_ordered_list(const char *line) +{ + line = skip_whitespace(line); + while (*line && isdigit_c(*line)) line++; + return *line == '.' && line[1] == ' '; +} + +// Check if line is a table row (starts with |) +static int is_table_row(const char *line) +{ + line = skip_whitespace(line); + return *line == '|'; +} + +// Check if line is a table separator row (| --- | --- |) +static int is_table_separator(const char *line) +{ + line = skip_whitespace(line); + if (*line != '|') return 0; + line++; + + int has_dash = 0; + while (*line) { + if (*line == '-') has_dash = 1; + else if (*line == '|' || *line == ':' || isspace_c(*line)) { /* ok */ } + else return 0; + line++; + } + return has_dash; +} + +// Forward declaration for process_inline +static void process_inline(StringBuffer *buf, const char *text, size_t len); + +// Parse table cells from a row and append to buffer +static void parse_table_row(StringBuffer *buf, const char *line, int is_header) +{ + const char *cell_tag = is_header ? "th" : "td"; + + buffer_append(buf, "<tr>"); + + line = skip_whitespace(line); + if (*line == '|') line++; // Skip leading | + + while (*line) { + // Skip whitespace before cell content + while (*line && isspace_c(*line)) line++; + + // Find cell end (next | or end of line) + const char *cell_start = line; + while (*line && *line != '|') line++; + + // Trim trailing whitespace from cell + const char *cell_end = line; + while (cell_end > cell_start && isspace_c(*(cell_end - 1))) cell_end--; + + size_t cell_len = cell_end - cell_start; + + // Only output cell if we have content or more cells coming + if (cell_len > 0 || *line == '|') { + buffer_append(buf, "<"); + buffer_append(buf, cell_tag); + buffer_append(buf, ">"); + if (cell_len > 0) { + process_inline(buf, cell_start, cell_len); + } + buffer_append(buf, "</"); + buffer_append(buf, cell_tag); + buffer_append(buf, ">"); + } + + if (*line == '|') line++; // Skip | + + // Check if this was the trailing | + const char *rest = line; + while (*rest && isspace_c(*rest)) rest++; + if (!*rest) break; // End of line after trailing | + } + + buffer_append(buf, "</tr>"); +} + +// Process inline markdown +static void process_inline(StringBuffer *buf, const char *text, size_t len) +{ + size_t i = 0; + + while (i < len) { + // Links: [text](url) + if (text[i] == '[') { + size_t link_start = i + 1; + size_t link_end = link_start; + while (link_end < len && text[link_end] != ']') link_end++; + + if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') { + size_t url_start = link_end + 2; + size_t url_end = url_start; + while (url_end < len && text[url_end] != ')') url_end++; + + if (url_end < len) { + buffer_append(buf, "<a href=\""); + buffer_append_n(buf, text + url_start, url_end - url_start); + buffer_append(buf, "\">"); + buffer_append_n(buf, text + link_start, link_end - link_start); + buffer_append(buf, "</a>"); + i = url_end + 1; + continue; + } + } + } + + // Images:  + if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') { + size_t alt_start = i + 2; + size_t alt_end = alt_start; + while (alt_end < len && text[alt_end] != ']') alt_end++; + + if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') { + size_t url_start = alt_end + 2; + size_t url_end = url_start; + while (url_end < len && text[url_end] != ')') url_end++; + + if (url_end < len) { + buffer_append(buf, "<img src=\""); + buffer_append_n(buf, text + url_start, url_end - url_start); + buffer_append(buf, "\" alt=\""); + buffer_append_n(buf, text + alt_start, alt_end - alt_start); + buffer_append(buf, "\">"); + i = url_end + 1; + continue; + } + } + } + + // Bold: **text** or __text__ + if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') || + (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) { + char marker = text[i]; + size_t start = i + 2; + size_t end = start; + while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++; + + if (end + 1 < len) { + buffer_append(buf, "<strong>"); + process_inline(buf, text + start, end - start); + buffer_append(buf, "</strong>"); + i = end + 2; + continue; + } + } + + // Strikethrough: ~~text~~ + if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') { + size_t start = i + 2; + size_t end = start; + while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++; + + if (end + 1 < len) { + buffer_append(buf, "<del>"); + process_inline(buf, text + start, end - start); + buffer_append(buf, "</del>"); + i = end + 2; + continue; + } + } + + // Italic: *text* or _text_ + if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace_c(text[i + 1])) { + char marker = text[i]; + size_t start = i + 1; + size_t end = start; + while (end < len && text[end] != marker) end++; + + if (end < len && end > start) { + buffer_append(buf, "<em>"); + process_inline(buf, text + start, end - start); + buffer_append(buf, "</em>"); + i = end + 1; + continue; + } + } + + // Inline code: `code` + if (text[i] == '`') { + size_t start = i + 1; + size_t end = start; + while (end < len && text[end] != '`') end++; + + if (end < len) { + buffer_append(buf, "<code>"); + buffer_append_n(buf, text + start, end - start); + buffer_append(buf, "</code>"); + i = end + 1; + continue; + } + } + + // HTML escape + if (text[i] == '<') { + buffer_append(buf, "<"); + } else if (text[i] == '>') { + buffer_append(buf, ">"); + } else if (text[i] == '&') { + buffer_append(buf, "&"); + } else { + buffer_append_char(buf, text[i]); + } + i++; + } +} + +// Append heading tag +static void append_heading_tag(StringBuffer *buf, int level, int closing) +{ + buffer_append_char(buf, '<'); + if (closing) buffer_append_char(buf, '/'); + buffer_append_char(buf, 'h'); + buffer_append_char(buf, '0' + level); + buffer_append_char(buf, '>'); +} + +// Convert markdown to HTML +WASM_EXPORT char *markdown_to_html(const char *markdown) +{ + if (!markdown) return 0; + + StringBuffer *buf = buffer_create(4096); + if (!buf) return 0; + + const char *ptr = markdown; + const char *line_start; + + while (*ptr) { + line_start = ptr; + + // Find end of line + while (*ptr && *ptr != '\n') ptr++; + size_t line_len = ptr - line_start; + + // Create line copy + char *line = (char *)malloc(line_len + 1); + if (!line) return buf->data; + memcpy(line, line_start, line_len); + line[line_len] = '\0'; + + // Skip empty lines + if (is_empty_line(line)) { + if (*ptr == '\n') ptr++; + continue; + } + + // Headings + int heading_level = count_heading_level(line); + if (heading_level > 0) { + const char *content = skip_whitespace(line); + while (*content == '#') content++; + content = skip_whitespace(content); + + append_heading_tag(buf, heading_level, 0); + process_inline(buf, content, strlen(content)); + append_heading_tag(buf, heading_level, 1); + + if (*ptr == '\n') ptr++; + continue; + } + + // Code block + if (starts_with(line, "```")) { + buffer_append(buf, "<pre><code>"); + if (*ptr == '\n') ptr++; + + while (*ptr) { + line_start = ptr; + while (*ptr && *ptr != '\n') ptr++; + line_len = ptr - line_start; + + char *code_line = (char *)malloc(line_len + 1); + if (!code_line) break; + memcpy(code_line, line_start, line_len); + code_line[line_len] = '\0'; + + if (starts_with(code_line, "```")) { + if (*ptr == '\n') ptr++; + break; + } + + for (size_t i = 0; i < line_len; i++) { + if (code_line[i] == '<') buffer_append(buf, "<"); + else if (code_line[i] == '>') buffer_append(buf, ">"); + else if (code_line[i] == '&') buffer_append(buf, "&"); + else buffer_append_char(buf, code_line[i]); + } + buffer_append_char(buf, '\n'); + + if (*ptr == '\n') ptr++; + } + + buffer_append(buf, "</code></pre>"); + continue; + } + + // Blockquote + if (starts_with(line, ">")) { + buffer_append(buf, "<blockquote>"); + + while (1) { + const char *content = skip_whitespace(line); + if (*content == '>') content++; + content = skip_whitespace(content); + process_inline(buf, content, strlen(content)); + buffer_append_char(buf, ' '); + + if (*ptr == '\n') ptr++; + if (!*ptr) break; + + line_start = ptr; + while (*ptr && *ptr != '\n') ptr++; + line_len = ptr - line_start; + + line = (char *)malloc(line_len + 1); + if (!line) break; + memcpy(line, line_start, line_len); + line[line_len] = '\0'; + + if (!starts_with(line, ">")) { + ptr = line_start; + break; + } + } + + buffer_append(buf, "</blockquote>"); + continue; + } + + // Horizontal rule + if (is_horizontal_rule(line)) { + buffer_append(buf, "<hr>"); + if (*ptr == '\n') ptr++; + continue; + } + + // Unordered list + if (is_unordered_list(line)) { + buffer_append(buf, "<ul>"); + + while (1) { + const char *content = skip_whitespace(line); + content += 2; + + buffer_append(buf, "<li>"); + process_inline(buf, content, strlen(content)); + buffer_append(buf, "</li>"); + + if (*ptr == '\n') ptr++; + if (!*ptr) break; + + line_start = ptr; + while (*ptr && *ptr != '\n') ptr++; + line_len = ptr - line_start; + + line = (char *)malloc(line_len + 1); + if (!line) break; + memcpy(line, line_start, line_len); + line[line_len] = '\0'; + + if (!is_unordered_list(line)) { + ptr = line_start; + break; + } + } + + buffer_append(buf, "</ul>"); + continue; + } + + // Ordered list + if (is_ordered_list(line)) { + buffer_append(buf, "<ol>"); + + while (1) { + const char *content = skip_whitespace(line); + while (*content && isdigit_c(*content)) content++; + if (*content == '.') content++; + content = skip_whitespace(content); + + buffer_append(buf, "<li>"); + process_inline(buf, content, strlen(content)); + buffer_append(buf, "</li>"); + + if (*ptr == '\n') ptr++; + if (!*ptr) break; + + line_start = ptr; + while (*ptr && *ptr != '\n') ptr++; + line_len = ptr - line_start; + + line = (char *)malloc(line_len + 1); + if (!line) break; + memcpy(line, line_start, line_len); + line[line_len] = '\0'; + + if (!is_ordered_list(line)) { + ptr = line_start; + break; + } + } + + buffer_append(buf, "</ol>"); + continue; + } + + // Table + if (is_table_row(line)) { + // Check if next line is a separator (to confirm this is a table) + const char *peek_ptr = ptr; + if (*peek_ptr == '\n') peek_ptr++; + + const char *next_line_start = peek_ptr; + while (*peek_ptr && *peek_ptr != '\n') peek_ptr++; + size_t next_line_len = peek_ptr - next_line_start; + + char *next_line = (char *)malloc(next_line_len + 1); + if (next_line) { + memcpy(next_line, next_line_start, next_line_len); + next_line[next_line_len] = '\0'; + + if (is_table_separator(next_line)) { + // It's a valid table + buffer_append(buf, "<table>"); + + // Header row + buffer_append(buf, "<thead>"); + parse_table_row(buf, line, 1); + buffer_append(buf, "</thead>"); + + // Skip to after separator + if (*ptr == '\n') ptr++; + ptr = peek_ptr; + if (*ptr == '\n') ptr++; + + // Body rows + buffer_append(buf, "<tbody>"); + while (*ptr) { + line_start = ptr; + while (*ptr && *ptr != '\n') ptr++; + line_len = ptr - line_start; + + line = (char *)malloc(line_len + 1); + if (!line) break; + memcpy(line, line_start, line_len); + line[line_len] = '\0'; + + if (!is_table_row(line) || is_empty_line(line)) { + ptr = line_start; + break; + } + + parse_table_row(buf, line, 0); + if (*ptr == '\n') ptr++; + } + buffer_append(buf, "</tbody>"); + + buffer_append(buf, "</table>"); + continue; + } + } + } + + // Paragraph + buffer_append(buf, "<p>"); + + while (1) { + const char *content = skip_whitespace(line); + process_inline(buf, content, strlen(content)); + + if (*ptr == '\n') ptr++; + if (!*ptr) break; + + line_start = ptr; + while (*ptr && *ptr != '\n') ptr++; + line_len = ptr - line_start; + + line = (char *)malloc(line_len + 1); + if (!line) break; + memcpy(line, line_start, line_len); + line[line_len] = '\0'; + + if (is_empty_line(line) || + count_heading_level(line) > 0 || + starts_with(line, "```") || + starts_with(line, ">") || + is_horizontal_rule(line) || + is_unordered_list(line) || + is_ordered_list(line) || + is_table_row(line)) { + ptr = line_start; + break; + } + + buffer_append_char(buf, ' '); + } + + buffer_append(buf, "</p>"); + } + + return buf->data; +} + +// Get string length (for JS interop) +WASM_EXPORT size_t markdown_strlen(const char *str) +{ + return str ? strlen(str) : 0; +}