Mercurial
comparison markdown_converter/markdown_to_html_wasm.c @ 154:bdcc610eeed8
[Markdown Converter][GuiZe] Added markdown coverter in C and wasm rule sets. Needs further view on this as I haven't taken a look. Written by Claude.
| author | June Park <parkjune1995@gmail.com> |
|---|---|
| date | Mon, 12 Jan 2026 09:11:58 -0800 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 153:790930d9bb90 | 154:bdcc610eeed8 |
|---|---|
| 1 /** | |
| 2 * Markdown to HTML Converter - Standalone WASM Implementation | |
| 3 * No libc dependencies - can be compiled with: clang --target=wasm32 | |
| 4 */ | |
| 5 | |
| 6 #define WASM_EXPORT __attribute__((visibility("default"))) | |
| 7 | |
| 8 typedef unsigned long size_t; | |
| 9 typedef int int32_t; | |
| 10 | |
| 11 // Simple bump allocator for WASM | |
| 12 #define HEAP_SIZE (1024 * 1024) // 1MB heap | |
| 13 static char heap[HEAP_SIZE]; | |
| 14 static size_t heap_offset = 0; | |
| 15 | |
| 16 WASM_EXPORT void *malloc(size_t size) | |
| 17 { | |
| 18 // Align to 8 bytes | |
| 19 size_t aligned_offset = (heap_offset + 7) & ~7; | |
| 20 if (aligned_offset + size > HEAP_SIZE) return 0; | |
| 21 | |
| 22 void *ptr = &heap[aligned_offset]; | |
| 23 heap_offset = aligned_offset + size; | |
| 24 return ptr; | |
| 25 } | |
| 26 | |
| 27 WASM_EXPORT void free(void *ptr) | |
| 28 { | |
| 29 // Simple bump allocator - no actual free | |
| 30 (void)ptr; | |
| 31 } | |
| 32 | |
| 33 WASM_EXPORT void heap_reset(void) | |
| 34 { | |
| 35 heap_offset = 0; | |
| 36 } | |
| 37 | |
| 38 // String functions | |
| 39 static size_t strlen(const char *s) | |
| 40 { | |
| 41 size_t len = 0; | |
| 42 while (s[len]) len++; | |
| 43 return len; | |
| 44 } | |
| 45 | |
| 46 static void *memcpy(void *dest, const void *src, size_t n) | |
| 47 { | |
| 48 char *d = (char *)dest; | |
| 49 const char *s = (const char *)src; | |
| 50 while (n--) *d++ = *s++; | |
| 51 return dest; | |
| 52 } | |
| 53 | |
| 54 static int isspace_c(int c) | |
| 55 { | |
| 56 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'; | |
| 57 } | |
| 58 | |
| 59 static int isdigit_c(int c) | |
| 60 { | |
| 61 return c >= '0' && c <= '9'; | |
| 62 } | |
| 63 | |
| 64 // String buffer for building HTML output | |
| 65 typedef struct { | |
| 66 char *data; | |
| 67 size_t length; | |
| 68 size_t capacity; | |
| 69 } StringBuffer; | |
| 70 | |
| 71 static StringBuffer *buffer_create(size_t initial_capacity) | |
| 72 { | |
| 73 StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer)); | |
| 74 if (!buf) return 0; | |
| 75 | |
| 76 buf->data = (char *)malloc(initial_capacity); | |
| 77 if (!buf->data) return 0; | |
| 78 | |
| 79 buf->data[0] = '\0'; | |
| 80 buf->length = 0; | |
| 81 buf->capacity = initial_capacity; | |
| 82 return buf; | |
| 83 } | |
| 84 | |
| 85 static void buffer_grow(StringBuffer *buf, size_t needed) | |
| 86 { | |
| 87 if (buf->length + needed + 1 > buf->capacity) { | |
| 88 size_t new_capacity = buf->capacity * 2; | |
| 89 while (new_capacity < buf->length + needed + 1) | |
| 90 new_capacity *= 2; | |
| 91 | |
| 92 char *new_data = (char *)malloc(new_capacity); | |
| 93 if (new_data) { | |
| 94 memcpy(new_data, buf->data, buf->length + 1); | |
| 95 buf->data = new_data; | |
| 96 buf->capacity = new_capacity; | |
| 97 } | |
| 98 } | |
| 99 } | |
| 100 | |
| 101 static void buffer_append(StringBuffer *buf, const char *str) | |
| 102 { | |
| 103 size_t len = strlen(str); | |
| 104 buffer_grow(buf, len); | |
| 105 memcpy(buf->data + buf->length, str, len + 1); | |
| 106 buf->length += len; | |
| 107 } | |
| 108 | |
| 109 static void buffer_append_n(StringBuffer *buf, const char *str, size_t n) | |
| 110 { | |
| 111 buffer_grow(buf, n); | |
| 112 memcpy(buf->data + buf->length, str, n); | |
| 113 buf->length += n; | |
| 114 buf->data[buf->length] = '\0'; | |
| 115 } | |
| 116 | |
| 117 static void buffer_append_char(StringBuffer *buf, char c) | |
| 118 { | |
| 119 buffer_grow(buf, 1); | |
| 120 buf->data[buf->length++] = c; | |
| 121 buf->data[buf->length] = '\0'; | |
| 122 } | |
| 123 | |
| 124 // Check if line starts with pattern (after trimming whitespace) | |
| 125 static int starts_with(const char *line, const char *pattern) | |
| 126 { | |
| 127 while (*line && isspace_c(*line)) line++; | |
| 128 size_t plen = strlen(pattern); | |
| 129 for (size_t i = 0; i < plen; i++) { | |
| 130 if (line[i] != pattern[i]) return 0; | |
| 131 } | |
| 132 return 1; | |
| 133 } | |
| 134 | |
| 135 // Count leading # characters | |
| 136 static int count_heading_level(const char *line) | |
| 137 { | |
| 138 int count = 0; | |
| 139 while (*line && isspace_c(*line)) line++; | |
| 140 while (line[count] == '#' && count < 6) count++; | |
| 141 if (count > 0 && line[count] == ' ') return count; | |
| 142 return 0; | |
| 143 } | |
| 144 | |
| 145 // Skip whitespace | |
| 146 static const char *skip_whitespace(const char *str) | |
| 147 { | |
| 148 while (*str && isspace_c(*str)) str++; | |
| 149 return str; | |
| 150 } | |
| 151 | |
| 152 // Check if line is empty | |
| 153 static int is_empty_line(const char *line) | |
| 154 { | |
| 155 while (*line) { | |
| 156 if (!isspace_c(*line)) return 0; | |
| 157 line++; | |
| 158 } | |
| 159 return 1; | |
| 160 } | |
| 161 | |
| 162 // Check if line is horizontal rule | |
| 163 static int is_horizontal_rule(const char *line) | |
| 164 { | |
| 165 line = skip_whitespace(line); | |
| 166 char first = *line; | |
| 167 if (first != '-' && first != '*' && first != '_') return 0; | |
| 168 | |
| 169 int count = 0; | |
| 170 while (*line) { | |
| 171 if (*line == first) count++; | |
| 172 else if (!isspace_c(*line)) return 0; | |
| 173 line++; | |
| 174 } | |
| 175 return count >= 3; | |
| 176 } | |
| 177 | |
| 178 // Check if line is unordered list item | |
| 179 static int is_unordered_list(const char *line) | |
| 180 { | |
| 181 line = skip_whitespace(line); | |
| 182 return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' '; | |
| 183 } | |
| 184 | |
| 185 // Check if line is ordered list item | |
| 186 static int is_ordered_list(const char *line) | |
| 187 { | |
| 188 line = skip_whitespace(line); | |
| 189 while (*line && isdigit_c(*line)) line++; | |
| 190 return *line == '.' && line[1] == ' '; | |
| 191 } | |
| 192 | |
| 193 // Process inline markdown | |
| 194 static void process_inline(StringBuffer *buf, const char *text, size_t len) | |
| 195 { | |
| 196 size_t i = 0; | |
| 197 | |
| 198 while (i < len) { | |
| 199 // Links: [text](url) | |
| 200 if (text[i] == '[') { | |
| 201 size_t link_start = i + 1; | |
| 202 size_t link_end = link_start; | |
| 203 while (link_end < len && text[link_end] != ']') link_end++; | |
| 204 | |
| 205 if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') { | |
| 206 size_t url_start = link_end + 2; | |
| 207 size_t url_end = url_start; | |
| 208 while (url_end < len && text[url_end] != ')') url_end++; | |
| 209 | |
| 210 if (url_end < len) { | |
| 211 buffer_append(buf, "<a href=\""); | |
| 212 buffer_append_n(buf, text + url_start, url_end - url_start); | |
| 213 buffer_append(buf, "\">"); | |
| 214 buffer_append_n(buf, text + link_start, link_end - link_start); | |
| 215 buffer_append(buf, "</a>"); | |
| 216 i = url_end + 1; | |
| 217 continue; | |
| 218 } | |
| 219 } | |
| 220 } | |
| 221 | |
| 222 // Images:  | |
| 223 if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') { | |
| 224 size_t alt_start = i + 2; | |
| 225 size_t alt_end = alt_start; | |
| 226 while (alt_end < len && text[alt_end] != ']') alt_end++; | |
| 227 | |
| 228 if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') { | |
| 229 size_t url_start = alt_end + 2; | |
| 230 size_t url_end = url_start; | |
| 231 while (url_end < len && text[url_end] != ')') url_end++; | |
| 232 | |
| 233 if (url_end < len) { | |
| 234 buffer_append(buf, "<img src=\""); | |
| 235 buffer_append_n(buf, text + url_start, url_end - url_start); | |
| 236 buffer_append(buf, "\" alt=\""); | |
| 237 buffer_append_n(buf, text + alt_start, alt_end - alt_start); | |
| 238 buffer_append(buf, "\">"); | |
| 239 i = url_end + 1; | |
| 240 continue; | |
| 241 } | |
| 242 } | |
| 243 } | |
| 244 | |
| 245 // Bold: **text** or __text__ | |
| 246 if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') || | |
| 247 (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) { | |
| 248 char marker = text[i]; | |
| 249 size_t start = i + 2; | |
| 250 size_t end = start; | |
| 251 while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++; | |
| 252 | |
| 253 if (end + 1 < len) { | |
| 254 buffer_append(buf, "<strong>"); | |
| 255 process_inline(buf, text + start, end - start); | |
| 256 buffer_append(buf, "</strong>"); | |
| 257 i = end + 2; | |
| 258 continue; | |
| 259 } | |
| 260 } | |
| 261 | |
| 262 // Strikethrough: ~~text~~ | |
| 263 if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') { | |
| 264 size_t start = i + 2; | |
| 265 size_t end = start; | |
| 266 while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++; | |
| 267 | |
| 268 if (end + 1 < len) { | |
| 269 buffer_append(buf, "<del>"); | |
| 270 process_inline(buf, text + start, end - start); | |
| 271 buffer_append(buf, "</del>"); | |
| 272 i = end + 2; | |
| 273 continue; | |
| 274 } | |
| 275 } | |
| 276 | |
| 277 // Italic: *text* or _text_ | |
| 278 if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace_c(text[i + 1])) { | |
| 279 char marker = text[i]; | |
| 280 size_t start = i + 1; | |
| 281 size_t end = start; | |
| 282 while (end < len && text[end] != marker) end++; | |
| 283 | |
| 284 if (end < len && end > start) { | |
| 285 buffer_append(buf, "<em>"); | |
| 286 process_inline(buf, text + start, end - start); | |
| 287 buffer_append(buf, "</em>"); | |
| 288 i = end + 1; | |
| 289 continue; | |
| 290 } | |
| 291 } | |
| 292 | |
| 293 // Inline code: `code` | |
| 294 if (text[i] == '`') { | |
| 295 size_t start = i + 1; | |
| 296 size_t end = start; | |
| 297 while (end < len && text[end] != '`') end++; | |
| 298 | |
| 299 if (end < len) { | |
| 300 buffer_append(buf, "<code>"); | |
| 301 buffer_append_n(buf, text + start, end - start); | |
| 302 buffer_append(buf, "</code>"); | |
| 303 i = end + 1; | |
| 304 continue; | |
| 305 } | |
| 306 } | |
| 307 | |
| 308 // HTML escape | |
| 309 if (text[i] == '<') { | |
| 310 buffer_append(buf, "<"); | |
| 311 } else if (text[i] == '>') { | |
| 312 buffer_append(buf, ">"); | |
| 313 } else if (text[i] == '&') { | |
| 314 buffer_append(buf, "&"); | |
| 315 } else { | |
| 316 buffer_append_char(buf, text[i]); | |
| 317 } | |
| 318 i++; | |
| 319 } | |
| 320 } | |
| 321 | |
| 322 // Append heading tag | |
| 323 static void append_heading_tag(StringBuffer *buf, int level, int closing) | |
| 324 { | |
| 325 buffer_append_char(buf, '<'); | |
| 326 if (closing) buffer_append_char(buf, '/'); | |
| 327 buffer_append_char(buf, 'h'); | |
| 328 buffer_append_char(buf, '0' + level); | |
| 329 buffer_append_char(buf, '>'); | |
| 330 } | |
| 331 | |
| 332 // Convert markdown to HTML | |
| 333 WASM_EXPORT char *markdown_to_html(const char *markdown) | |
| 334 { | |
| 335 if (!markdown) return 0; | |
| 336 | |
| 337 StringBuffer *buf = buffer_create(4096); | |
| 338 if (!buf) return 0; | |
| 339 | |
| 340 const char *ptr = markdown; | |
| 341 const char *line_start; | |
| 342 | |
| 343 while (*ptr) { | |
| 344 line_start = ptr; | |
| 345 | |
| 346 // Find end of line | |
| 347 while (*ptr && *ptr != '\n') ptr++; | |
| 348 size_t line_len = ptr - line_start; | |
| 349 | |
| 350 // Create line copy | |
| 351 char *line = (char *)malloc(line_len + 1); | |
| 352 if (!line) return buf->data; | |
| 353 memcpy(line, line_start, line_len); | |
| 354 line[line_len] = '\0'; | |
| 355 | |
| 356 // Skip empty lines | |
| 357 if (is_empty_line(line)) { | |
| 358 if (*ptr == '\n') ptr++; | |
| 359 continue; | |
| 360 } | |
| 361 | |
| 362 // Headings | |
| 363 int heading_level = count_heading_level(line); | |
| 364 if (heading_level > 0) { | |
| 365 const char *content = skip_whitespace(line); | |
| 366 while (*content == '#') content++; | |
| 367 content = skip_whitespace(content); | |
| 368 | |
| 369 append_heading_tag(buf, heading_level, 0); | |
| 370 process_inline(buf, content, strlen(content)); | |
| 371 append_heading_tag(buf, heading_level, 1); | |
| 372 | |
| 373 if (*ptr == '\n') ptr++; | |
| 374 continue; | |
| 375 } | |
| 376 | |
| 377 // Code block | |
| 378 if (starts_with(line, "```")) { | |
| 379 buffer_append(buf, "<pre><code>"); | |
| 380 if (*ptr == '\n') ptr++; | |
| 381 | |
| 382 while (*ptr) { | |
| 383 line_start = ptr; | |
| 384 while (*ptr && *ptr != '\n') ptr++; | |
| 385 line_len = ptr - line_start; | |
| 386 | |
| 387 char *code_line = (char *)malloc(line_len + 1); | |
| 388 if (!code_line) break; | |
| 389 memcpy(code_line, line_start, line_len); | |
| 390 code_line[line_len] = '\0'; | |
| 391 | |
| 392 if (starts_with(code_line, "```")) { | |
| 393 if (*ptr == '\n') ptr++; | |
| 394 break; | |
| 395 } | |
| 396 | |
| 397 for (size_t i = 0; i < line_len; i++) { | |
| 398 if (code_line[i] == '<') buffer_append(buf, "<"); | |
| 399 else if (code_line[i] == '>') buffer_append(buf, ">"); | |
| 400 else if (code_line[i] == '&') buffer_append(buf, "&"); | |
| 401 else buffer_append_char(buf, code_line[i]); | |
| 402 } | |
| 403 buffer_append_char(buf, '\n'); | |
| 404 | |
| 405 if (*ptr == '\n') ptr++; | |
| 406 } | |
| 407 | |
| 408 buffer_append(buf, "</code></pre>"); | |
| 409 continue; | |
| 410 } | |
| 411 | |
| 412 // Blockquote | |
| 413 if (starts_with(line, ">")) { | |
| 414 buffer_append(buf, "<blockquote>"); | |
| 415 | |
| 416 while (1) { | |
| 417 const char *content = skip_whitespace(line); | |
| 418 if (*content == '>') content++; | |
| 419 content = skip_whitespace(content); | |
| 420 process_inline(buf, content, strlen(content)); | |
| 421 buffer_append_char(buf, ' '); | |
| 422 | |
| 423 if (*ptr == '\n') ptr++; | |
| 424 if (!*ptr) break; | |
| 425 | |
| 426 line_start = ptr; | |
| 427 while (*ptr && *ptr != '\n') ptr++; | |
| 428 line_len = ptr - line_start; | |
| 429 | |
| 430 line = (char *)malloc(line_len + 1); | |
| 431 if (!line) break; | |
| 432 memcpy(line, line_start, line_len); | |
| 433 line[line_len] = '\0'; | |
| 434 | |
| 435 if (!starts_with(line, ">")) { | |
| 436 ptr = line_start; | |
| 437 break; | |
| 438 } | |
| 439 } | |
| 440 | |
| 441 buffer_append(buf, "</blockquote>"); | |
| 442 continue; | |
| 443 } | |
| 444 | |
| 445 // Horizontal rule | |
| 446 if (is_horizontal_rule(line)) { | |
| 447 buffer_append(buf, "<hr>"); | |
| 448 if (*ptr == '\n') ptr++; | |
| 449 continue; | |
| 450 } | |
| 451 | |
| 452 // Unordered list | |
| 453 if (is_unordered_list(line)) { | |
| 454 buffer_append(buf, "<ul>"); | |
| 455 | |
| 456 while (1) { | |
| 457 const char *content = skip_whitespace(line); | |
| 458 content += 2; | |
| 459 | |
| 460 buffer_append(buf, "<li>"); | |
| 461 process_inline(buf, content, strlen(content)); | |
| 462 buffer_append(buf, "</li>"); | |
| 463 | |
| 464 if (*ptr == '\n') ptr++; | |
| 465 if (!*ptr) break; | |
| 466 | |
| 467 line_start = ptr; | |
| 468 while (*ptr && *ptr != '\n') ptr++; | |
| 469 line_len = ptr - line_start; | |
| 470 | |
| 471 line = (char *)malloc(line_len + 1); | |
| 472 if (!line) break; | |
| 473 memcpy(line, line_start, line_len); | |
| 474 line[line_len] = '\0'; | |
| 475 | |
| 476 if (!is_unordered_list(line)) { | |
| 477 ptr = line_start; | |
| 478 break; | |
| 479 } | |
| 480 } | |
| 481 | |
| 482 buffer_append(buf, "</ul>"); | |
| 483 continue; | |
| 484 } | |
| 485 | |
| 486 // Ordered list | |
| 487 if (is_ordered_list(line)) { | |
| 488 buffer_append(buf, "<ol>"); | |
| 489 | |
| 490 while (1) { | |
| 491 const char *content = skip_whitespace(line); | |
| 492 while (*content && isdigit_c(*content)) content++; | |
| 493 if (*content == '.') content++; | |
| 494 content = skip_whitespace(content); | |
| 495 | |
| 496 buffer_append(buf, "<li>"); | |
| 497 process_inline(buf, content, strlen(content)); | |
| 498 buffer_append(buf, "</li>"); | |
| 499 | |
| 500 if (*ptr == '\n') ptr++; | |
| 501 if (!*ptr) break; | |
| 502 | |
| 503 line_start = ptr; | |
| 504 while (*ptr && *ptr != '\n') ptr++; | |
| 505 line_len = ptr - line_start; | |
| 506 | |
| 507 line = (char *)malloc(line_len + 1); | |
| 508 if (!line) break; | |
| 509 memcpy(line, line_start, line_len); | |
| 510 line[line_len] = '\0'; | |
| 511 | |
| 512 if (!is_ordered_list(line)) { | |
| 513 ptr = line_start; | |
| 514 break; | |
| 515 } | |
| 516 } | |
| 517 | |
| 518 buffer_append(buf, "</ol>"); | |
| 519 continue; | |
| 520 } | |
| 521 | |
| 522 // Paragraph | |
| 523 buffer_append(buf, "<p>"); | |
| 524 | |
| 525 while (1) { | |
| 526 const char *content = skip_whitespace(line); | |
| 527 process_inline(buf, content, strlen(content)); | |
| 528 | |
| 529 if (*ptr == '\n') ptr++; | |
| 530 if (!*ptr) break; | |
| 531 | |
| 532 line_start = ptr; | |
| 533 while (*ptr && *ptr != '\n') ptr++; | |
| 534 line_len = ptr - line_start; | |
| 535 | |
| 536 line = (char *)malloc(line_len + 1); | |
| 537 if (!line) break; | |
| 538 memcpy(line, line_start, line_len); | |
| 539 line[line_len] = '\0'; | |
| 540 | |
| 541 if (is_empty_line(line) || | |
| 542 count_heading_level(line) > 0 || | |
| 543 starts_with(line, "```") || | |
| 544 starts_with(line, ">") || | |
| 545 is_horizontal_rule(line) || | |
| 546 is_unordered_list(line) || | |
| 547 is_ordered_list(line)) { | |
| 548 ptr = line_start; | |
| 549 break; | |
| 550 } | |
| 551 | |
| 552 buffer_append_char(buf, ' '); | |
| 553 } | |
| 554 | |
| 555 buffer_append(buf, "</p>"); | |
| 556 } | |
| 557 | |
| 558 return buf->data; | |
| 559 } | |
| 560 | |
| 561 // Get string length (for JS interop) | |
| 562 WASM_EXPORT size_t markdown_strlen(const char *str) | |
| 563 { | |
| 564 return str ? strlen(str) : 0; | |
| 565 } |