/home/mrjunejune/zenbu: markdown_converter/wasm/markdown_to_html

comparison markdown_converter/wasm/markdown_to_html_wasm.c @ 156:cd35e600ae34

[MarkDown Converter] Fixed few things and made a test

author	June Park <parkjune1995@gmail.com>
date	Mon, 12 Jan 2026 15:20:39 -0800
parents
children

comparison

equal deleted inserted replaced

-:3bb45eb67906
+:cd35e600ae34
+/**
+* Markdown to HTML Converter - Standalone WASM Implementation
+* No libc dependencies - can be compiled with: clang --target=wasm32
+*/
+#define WASM_EXPORT __attribute__((visibility("default")))
+typedef unsigned long size_t;
+typedef int int32_t;
+// Simple bump allocator for WASM
+#define HEAP_SIZE (1024 * 1024)  // 1MB heap
+static char heap[HEAP_SIZE];
+static size_t heap_offset = 0;
+WASM_EXPORT void *malloc(size_t size)
+{
+// Align to 8 bytes
+size_t aligned_offset = (heap_offset + 7) & ~7;
+if (aligned_offset + size > HEAP_SIZE) return 0;
+void *ptr = &heap[aligned_offset];
+heap_offset = aligned_offset + size;
+return ptr;
+}
+WASM_EXPORT void free(void *ptr)
+{
+// Simple bump allocator - no actual free
+(void)ptr;
+}
+WASM_EXPORT void heap_reset(void)
+{
+heap_offset = 0;
+}
+// String functions
+static size_t strlen(const char *s)
+{
+size_t len = 0;
+while (s[len]) len++;
+return len;
+}
+static void *memcpy(void *dest, const void *src, size_t n)
+{
+char *d = (char *)dest;
+const char *s = (const char *)src;
+while (n--) *d++ = *s++;
+return dest;
+}
+static int isspace_c(int c)
+{
+return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v';
+}
+static int isdigit_c(int c)
+{
+return c >= '0' && c <= '9';
+}
+// String buffer for building HTML output
+typedef struct {
+char  *data;
+size_t length;
+size_t capacity;
+} StringBuffer;
+static StringBuffer *buffer_create(size_t initial_capacity)
+{
+StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer));
+if (!buf) return 0;
+buf->data = (char *)malloc(initial_capacity);
+if (!buf->data) return 0;
+buf->data[0] = '\0';
+buf->length = 0;
+buf->capacity = initial_capacity;
+return buf;
+}
+static void buffer_grow(StringBuffer *buf, size_t needed)
+{
+if (buf->length + needed + 1 > buf->capacity) {
+size_t new_capacity = buf->capacity * 2;
+while (new_capacity < buf->length + needed + 1)
+new_capacity *= 2;
+char *new_data = (char *)malloc(new_capacity);
+if (new_data) {
+memcpy(new_data, buf->data, buf->length + 1);
+buf->data = new_data;
+buf->capacity = new_capacity;
+}
+}
+}
+static void buffer_append(StringBuffer *buf, const char *str)
+{
+size_t len = strlen(str);
+buffer_grow(buf, len);
+memcpy(buf->data + buf->length, str, len + 1);
+buf->length += len;
+}
+static void buffer_append_n(StringBuffer *buf, const char *str, size_t n)
+{
+buffer_grow(buf, n);
+memcpy(buf->data + buf->length, str, n);
+buf->length += n;
+buf->data[buf->length] = '\0';
+}
+static void buffer_append_char(StringBuffer *buf, char c)
+{
+buffer_grow(buf, 1);
+buf->data[buf->length++] = c;
+buf->data[buf->length] = '\0';
+}
+// Check if line starts with pattern (after trimming whitespace)
+static int starts_with(const char *line, const char *pattern)
+{
+while (*line && isspace_c(*line)) line++;
+size_t plen = strlen(pattern);
+for (size_t i = 0; i < plen; i++) {
+if (line[i] != pattern[i]) return 0;
+}
+return 1;
+}
+// Count leading # characters
+static int count_heading_level(const char *line)
+{
+int count = 0;
+while (*line && isspace_c(*line)) line++;
+while (line[count] == '#' && count < 6) count++;
+if (count > 0 && line[count] == ' ') return count;
+return 0;
+}
+// Skip whitespace
+static const char *skip_whitespace(const char *str)
+{
+while (*str && isspace_c(*str)) str++;
+return str;
+}
+// Check if line is empty
+static int is_empty_line(const char *line)
+{
+while (*line) {
+if (!isspace_c(*line)) return 0;
+line++;
+}
+return 1;
+}
+// Check if line is horizontal rule
+static int is_horizontal_rule(const char *line)
+{
+line = skip_whitespace(line);
+char first = *line;
+if (first != '-' && first != '*' && first != '_') return 0;
+int count = 0;
+while (*line) {
+if (*line == first) count++;
+else if (!isspace_c(*line)) return 0;
+line++;
+}
+return count >= 3;
+}
+// Check if line is unordered list item
+static int is_unordered_list(const char *line)
+{
+line = skip_whitespace(line);
+return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' ';
+}
+// Check if line is ordered list item
+static int is_ordered_list(const char *line)
+{
+line = skip_whitespace(line);
+while (*line && isdigit_c(*line)) line++;
+return *line == '.' && line[1] == ' ';
+}
+// Check if line is a table row (starts with |)
+static int is_table_row(const char *line)
+{
+line = skip_whitespace(line);
+return *line == '|';
+}
+// Check if line is a table separator row (| --- | --- |)
+static int is_table_separator(const char *line)
+{
+line = skip_whitespace(line);
+if (*line != '|') return 0;
+line++;
+int has_dash = 0;
+while (*line) {
+if (*line == '-') has_dash = 1;
+else if (*line == '|' || *line == ':' || isspace_c(*line)) { /* ok */ }
+else return 0;
+line++;
+}
+return has_dash;
+}
+// Forward declaration for process_inline
+static void process_inline(StringBuffer *buf, const char *text, size_t len);
+// Parse table cells from a row and append to buffer
+static void parse_table_row(StringBuffer *buf, const char *line, int is_header)
+{
+const char *cell_tag = is_header ? "th" : "td";
+buffer_append(buf, "<tr>");
+line = skip_whitespace(line);
+if (*line == '|') line++; // Skip leading |
+while (*line) {
+// Skip whitespace before cell content
+while (*line && isspace_c(*line)) line++;
+// Find cell end (next | or end of line)
+const char *cell_start = line;
+while (*line && *line != '|') line++;
+// Trim trailing whitespace from cell
+const char *cell_end = line;
+while (cell_end > cell_start && isspace_c(*(cell_end - 1))) cell_end--;
+size_t cell_len = cell_end - cell_start;
+// Only output cell if we have content or more cells coming
+if (cell_len > 0 || *line == '|') {
+buffer_append(buf, "<");
+buffer_append(buf, cell_tag);
+buffer_append(buf, ">");
+if (cell_len > 0) {
+process_inline(buf, cell_start, cell_len);
+}
+buffer_append(buf, "</");
+buffer_append(buf, cell_tag);
+buffer_append(buf, ">");
+}
+if (*line == '|') line++; // Skip |
+// Check if this was the trailing |
+const char *rest = line;
+while (*rest && isspace_c(*rest)) rest++;
+if (!*rest) break; // End of line after trailing |
+}
+buffer_append(buf, "</tr>");
+}
+// Process inline markdown
+static void process_inline(StringBuffer *buf, const char *text, size_t len)
+{
+size_t i = 0;
+while (i < len) {
+// Links: [text](url)
+if (text[i] == '[') {
+size_t link_start = i + 1;
+size_t link_end = link_start;
+while (link_end < len && text[link_end] != ']') link_end++;
+if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') {
+size_t url_start = link_end + 2;
+size_t url_end = url_start;
+while (url_end < len && text[url_end] != ')') url_end++;
+if (url_end < len) {
+buffer_append(buf, "<a href=\"");
+buffer_append_n(buf, text + url_start, url_end - url_start);
+buffer_append(buf, "\">");
+buffer_append_n(buf, text + link_start, link_end - link_start);
+buffer_append(buf, "</a>");
+i = url_end + 1;
+continue;
+}
+}
+}
+// Images: ![alt](url)
+if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') {
+size_t alt_start = i + 2;
+size_t alt_end = alt_start;
+while (alt_end < len && text[alt_end] != ']') alt_end++;
+if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') {
+size_t url_start = alt_end + 2;
+size_t url_end = url_start;
+while (url_end < len && text[url_end] != ')') url_end++;
+if (url_end < len) {
+buffer_append(buf, "<img src=\"");
+buffer_append_n(buf, text + url_start, url_end - url_start);
+buffer_append(buf, "\" alt=\"");
+buffer_append_n(buf, text + alt_start, alt_end - alt_start);
+buffer_append(buf, "\">");
+i = url_end + 1;
+continue;
+}
+}
+}
+// Bold: **text** or __text__
+if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') ||
+(text[i] == '_' && i + 1 < len && text[i + 1] == '_')) {
+char marker = text[i];
+size_t start = i + 2;
+size_t end = start;
+while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++;
+if (end + 1 < len) {
+buffer_append(buf, "<strong>");
+process_inline(buf, text + start, end - start);
+buffer_append(buf, "</strong>");
+i = end + 2;
+continue;
+}
+}
+// Strikethrough: ~~text~~
+if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') {
+size_t start = i + 2;
+size_t end = start;
+while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++;
+if (end + 1 < len) {
+buffer_append(buf, "<del>");
+process_inline(buf, text + start, end - start);
+buffer_append(buf, "</del>");
+i = end + 2;
+continue;
+}
+}
+// Italic: *text* or _text_
+if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace_c(text[i + 1])) {
+char marker = text[i];
+size_t start = i + 1;
+size_t end = start;
+while (end < len && text[end] != marker) end++;
+if (end < len && end > start) {
+buffer_append(buf, "<em>");
+process_inline(buf, text + start, end - start);
+buffer_append(buf, "</em>");
+i = end + 1;
+continue;
+}
+}
+// Inline code: `code`
+if (text[i] == '`') {
+size_t start = i + 1;
+size_t end = start;
+while (end < len && text[end] != '`') end++;
+if (end < len) {
+buffer_append(buf, "<code>");
+buffer_append_n(buf, text + start, end - start);
+buffer_append(buf, "</code>");
+i = end + 1;
+continue;
+}
+}
+// HTML escape
+if (text[i] == '<') {
+buffer_append(buf, "&lt;");
+} else if (text[i] == '>') {
+buffer_append(buf, "&gt;");
+} else if (text[i] == '&') {
+buffer_append(buf, "&amp;");
+} else {
+buffer_append_char(buf, text[i]);
+}
+i++;
+}
+}
+// Append heading tag
+static void append_heading_tag(StringBuffer *buf, int level, int closing)
+{
+buffer_append_char(buf, '<');
+if (closing) buffer_append_char(buf, '/');
+buffer_append_char(buf, 'h');
+buffer_append_char(buf, '0' + level);
+buffer_append_char(buf, '>');
+}
+// Convert markdown to HTML
+WASM_EXPORT char *markdown_to_html(const char *markdown)
+{
+if (!markdown) return 0;
+StringBuffer *buf = buffer_create(4096);
+if (!buf) return 0;
+const char *ptr = markdown;
+const char *line_start;
+while (*ptr) {
+line_start = ptr;
+// Find end of line
+while (*ptr && *ptr != '\n') ptr++;
+size_t line_len = ptr - line_start;
+// Create line copy
+char *line = (char *)malloc(line_len + 1);
+if (!line) return buf->data;
+memcpy(line, line_start, line_len);
+line[line_len] = '\0';
+// Skip empty lines
+if (is_empty_line(line)) {
+if (*ptr == '\n') ptr++;
+continue;
+}
+// Headings
+int heading_level = count_heading_level(line);
+if (heading_level > 0) {
+const char *content = skip_whitespace(line);
+while (*content == '#') content++;
+content = skip_whitespace(content);
+append_heading_tag(buf, heading_level, 0);
+process_inline(buf, content, strlen(content));
+append_heading_tag(buf, heading_level, 1);
+if (*ptr == '\n') ptr++;
+continue;
+}
+// Code block
+if (starts_with(line, "```")) {
+buffer_append(buf, "<pre><code>");
+if (*ptr == '\n') ptr++;
+while (*ptr) {
+line_start = ptr;
+while (*ptr && *ptr != '\n') ptr++;
+line_len = ptr - line_start;
+char *code_line = (char *)malloc(line_len + 1);
+if (!code_line) break;
+memcpy(code_line, line_start, line_len);
+code_line[line_len] = '\0';
+if (starts_with(code_line, "```")) {
+if (*ptr == '\n') ptr++;
+break;
+}
+for (size_t i = 0; i < line_len; i++) {
+if (code_line[i] == '<') buffer_append(buf, "&lt;");
+else if (code_line[i] == '>') buffer_append(buf, "&gt;");
+else if (code_line[i] == '&') buffer_append(buf, "&amp;");
+else buffer_append_char(buf, code_line[i]);
+}
+buffer_append_char(buf, '\n');
+if (*ptr == '\n') ptr++;
+}
+buffer_append(buf, "</code></pre>");
+continue;
+}
+// Blockquote
+if (starts_with(line, ">")) {
+buffer_append(buf, "<blockquote>");
+while (1) {
+const char *content = skip_whitespace(line);
+if (*content == '>') content++;
+content = skip_whitespace(content);
+process_inline(buf, content, strlen(content));
+buffer_append_char(buf, ' ');
+if (*ptr == '\n') ptr++;
+if (!*ptr) break;
+line_start = ptr;
+while (*ptr && *ptr != '\n') ptr++;
+line_len = ptr - line_start;
+line = (char *)malloc(line_len + 1);
+if (!line) break;
+memcpy(line, line_start, line_len);
+line[line_len] = '\0';
+if (!starts_with(line, ">")) {
+ptr = line_start;
+break;
+}
+}
+buffer_append(buf, "</blockquote>");
+continue;
+}
+// Horizontal rule
+if (is_horizontal_rule(line)) {
+buffer_append(buf, "<hr>");
+if (*ptr == '\n') ptr++;
+continue;
+}
+// Unordered list
+if (is_unordered_list(line)) {
+buffer_append(buf, "<ul>");
+while (1) {
+const char *content = skip_whitespace(line);
+content += 2;
+buffer_append(buf, "<li>");
+process_inline(buf, content, strlen(content));
+buffer_append(buf, "</li>");
+if (*ptr == '\n') ptr++;
+if (!*ptr) break;
+line_start = ptr;
+while (*ptr && *ptr != '\n') ptr++;
+line_len = ptr - line_start;
+line = (char *)malloc(line_len + 1);
+if (!line) break;
+memcpy(line, line_start, line_len);
+line[line_len] = '\0';
+if (!is_unordered_list(line)) {
+ptr = line_start;
+break;
+}
+}
+buffer_append(buf, "</ul>");
+continue;
+}
+// Ordered list
+if (is_ordered_list(line)) {
+buffer_append(buf, "<ol>");
+while (1) {
+const char *content = skip_whitespace(line);
+while (*content && isdigit_c(*content)) content++;
+if (*content == '.') content++;
+content = skip_whitespace(content);
+buffer_append(buf, "<li>");
+process_inline(buf, content, strlen(content));
+buffer_append(buf, "</li>");
+if (*ptr == '\n') ptr++;
+if (!*ptr) break;
+line_start = ptr;
+while (*ptr && *ptr != '\n') ptr++;
+line_len = ptr - line_start;
+line = (char *)malloc(line_len + 1);
+if (!line) break;
+memcpy(line, line_start, line_len);
+line[line_len] = '\0';
+if (!is_ordered_list(line)) {
+ptr = line_start;
+break;
+}
+}
+buffer_append(buf, "</ol>");
+continue;
+}
+// Table
+if (is_table_row(line)) {
+// Check if next line is a separator (to confirm this is a table)
+const char *peek_ptr = ptr;
+if (*peek_ptr == '\n') peek_ptr++;
+const char *next_line_start = peek_ptr;
+while (*peek_ptr && *peek_ptr != '\n') peek_ptr++;
+size_t next_line_len = peek_ptr - next_line_start;
+char *next_line = (char *)malloc(next_line_len + 1);
+if (next_line) {
+memcpy(next_line, next_line_start, next_line_len);
+next_line[next_line_len] = '\0';
+if (is_table_separator(next_line)) {
+// It's a valid table
+buffer_append(buf, "<table>");
+// Header row
+buffer_append(buf, "<thead>");
+parse_table_row(buf, line, 1);
+buffer_append(buf, "</thead>");
+// Skip to after separator
+if (*ptr == '\n') ptr++;
+ptr = peek_ptr;
+if (*ptr == '\n') ptr++;
+// Body rows
+buffer_append(buf, "<tbody>");
+while (*ptr) {
+line_start = ptr;
+while (*ptr && *ptr != '\n') ptr++;
+line_len = ptr - line_start;
+line = (char *)malloc(line_len + 1);
+if (!line) break;
+memcpy(line, line_start, line_len);
+line[line_len] = '\0';
+if (!is_table_row(line) || is_empty_line(line)) {
+ptr = line_start;
+break;
+}
+parse_table_row(buf, line, 0);
+if (*ptr == '\n') ptr++;
+}
+buffer_append(buf, "</tbody>");
+buffer_append(buf, "</table>");
+continue;
+}
+}
+}
+// Paragraph
+buffer_append(buf, "<p>");
+while (1) {
+const char *content = skip_whitespace(line);
+process_inline(buf, content, strlen(content));
+if (*ptr == '\n') ptr++;
+if (!*ptr) break;
+line_start = ptr;
+while (*ptr && *ptr != '\n') ptr++;
+line_len = ptr - line_start;
+line = (char *)malloc(line_len + 1);
+if (!line) break;
+memcpy(line, line_start, line_len);
+line[line_len] = '\0';
+if (is_empty_line(line) ||
+count_heading_level(line) > 0 ||
+starts_with(line, "```") ||
+starts_with(line, ">") ||
+is_horizontal_rule(line) ||
+is_unordered_list(line) ||
+is_ordered_list(line) ||
+is_table_row(line)) {
+ptr = line_start;
+break;
+}
+buffer_append_char(buf, ' ');
+}
+buffer_append(buf, "</p>");
+}
+return buf->data;
+}
+// Get string length (for JS interop)
+WASM_EXPORT size_t markdown_strlen(const char *str)
+{
+return str ? strlen(str) : 0;
+}

Mercurial

comparison markdown_converter/wasm/markdown_to_html_wasm.c @ 156:cd35e600ae34