comparison markdown_converter/wasm/markdown_to_html_wasm.c @ 156:cd35e600ae34

[MarkDown Converter] Fixed few things and made a test
author June Park <parkjune1995@gmail.com>
date Mon, 12 Jan 2026 15:20:39 -0800
parents
children
comparison
equal deleted inserted replaced
155:3bb45eb67906 156:cd35e600ae34
1 /**
2 * Markdown to HTML Converter - Standalone WASM Implementation
3 * No libc dependencies - can be compiled with: clang --target=wasm32
4 */
5
6 #define WASM_EXPORT __attribute__((visibility("default")))
7
8 typedef unsigned long size_t;
9 typedef int int32_t;
10
11 // Simple bump allocator for WASM
12 #define HEAP_SIZE (1024 * 1024) // 1MB heap
13 static char heap[HEAP_SIZE];
14 static size_t heap_offset = 0;
15
16 WASM_EXPORT void *malloc(size_t size)
17 {
18 // Align to 8 bytes
19 size_t aligned_offset = (heap_offset + 7) & ~7;
20 if (aligned_offset + size > HEAP_SIZE) return 0;
21
22 void *ptr = &heap[aligned_offset];
23 heap_offset = aligned_offset + size;
24 return ptr;
25 }
26
27 WASM_EXPORT void free(void *ptr)
28 {
29 // Simple bump allocator - no actual free
30 (void)ptr;
31 }
32
33 WASM_EXPORT void heap_reset(void)
34 {
35 heap_offset = 0;
36 }
37
38 // String functions
39 static size_t strlen(const char *s)
40 {
41 size_t len = 0;
42 while (s[len]) len++;
43 return len;
44 }
45
46 static void *memcpy(void *dest, const void *src, size_t n)
47 {
48 char *d = (char *)dest;
49 const char *s = (const char *)src;
50 while (n--) *d++ = *s++;
51 return dest;
52 }
53
54 static int isspace_c(int c)
55 {
56 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v';
57 }
58
59 static int isdigit_c(int c)
60 {
61 return c >= '0' && c <= '9';
62 }
63
64 // String buffer for building HTML output
65 typedef struct {
66 char *data;
67 size_t length;
68 size_t capacity;
69 } StringBuffer;
70
71 static StringBuffer *buffer_create(size_t initial_capacity)
72 {
73 StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer));
74 if (!buf) return 0;
75
76 buf->data = (char *)malloc(initial_capacity);
77 if (!buf->data) return 0;
78
79 buf->data[0] = '\0';
80 buf->length = 0;
81 buf->capacity = initial_capacity;
82 return buf;
83 }
84
85 static void buffer_grow(StringBuffer *buf, size_t needed)
86 {
87 if (buf->length + needed + 1 > buf->capacity) {
88 size_t new_capacity = buf->capacity * 2;
89 while (new_capacity < buf->length + needed + 1)
90 new_capacity *= 2;
91
92 char *new_data = (char *)malloc(new_capacity);
93 if (new_data) {
94 memcpy(new_data, buf->data, buf->length + 1);
95 buf->data = new_data;
96 buf->capacity = new_capacity;
97 }
98 }
99 }
100
101 static void buffer_append(StringBuffer *buf, const char *str)
102 {
103 size_t len = strlen(str);
104 buffer_grow(buf, len);
105 memcpy(buf->data + buf->length, str, len + 1);
106 buf->length += len;
107 }
108
109 static void buffer_append_n(StringBuffer *buf, const char *str, size_t n)
110 {
111 buffer_grow(buf, n);
112 memcpy(buf->data + buf->length, str, n);
113 buf->length += n;
114 buf->data[buf->length] = '\0';
115 }
116
117 static void buffer_append_char(StringBuffer *buf, char c)
118 {
119 buffer_grow(buf, 1);
120 buf->data[buf->length++] = c;
121 buf->data[buf->length] = '\0';
122 }
123
124 // Check if line starts with pattern (after trimming whitespace)
125 static int starts_with(const char *line, const char *pattern)
126 {
127 while (*line && isspace_c(*line)) line++;
128 size_t plen = strlen(pattern);
129 for (size_t i = 0; i < plen; i++) {
130 if (line[i] != pattern[i]) return 0;
131 }
132 return 1;
133 }
134
135 // Count leading # characters
136 static int count_heading_level(const char *line)
137 {
138 int count = 0;
139 while (*line && isspace_c(*line)) line++;
140 while (line[count] == '#' && count < 6) count++;
141 if (count > 0 && line[count] == ' ') return count;
142 return 0;
143 }
144
145 // Skip whitespace
146 static const char *skip_whitespace(const char *str)
147 {
148 while (*str && isspace_c(*str)) str++;
149 return str;
150 }
151
152 // Check if line is empty
153 static int is_empty_line(const char *line)
154 {
155 while (*line) {
156 if (!isspace_c(*line)) return 0;
157 line++;
158 }
159 return 1;
160 }
161
162 // Check if line is horizontal rule
163 static int is_horizontal_rule(const char *line)
164 {
165 line = skip_whitespace(line);
166 char first = *line;
167 if (first != '-' && first != '*' && first != '_') return 0;
168
169 int count = 0;
170 while (*line) {
171 if (*line == first) count++;
172 else if (!isspace_c(*line)) return 0;
173 line++;
174 }
175 return count >= 3;
176 }
177
178 // Check if line is unordered list item
179 static int is_unordered_list(const char *line)
180 {
181 line = skip_whitespace(line);
182 return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' ';
183 }
184
185 // Check if line is ordered list item
186 static int is_ordered_list(const char *line)
187 {
188 line = skip_whitespace(line);
189 while (*line && isdigit_c(*line)) line++;
190 return *line == '.' && line[1] == ' ';
191 }
192
193 // Check if line is a table row (starts with |)
194 static int is_table_row(const char *line)
195 {
196 line = skip_whitespace(line);
197 return *line == '|';
198 }
199
200 // Check if line is a table separator row (| --- | --- |)
201 static int is_table_separator(const char *line)
202 {
203 line = skip_whitespace(line);
204 if (*line != '|') return 0;
205 line++;
206
207 int has_dash = 0;
208 while (*line) {
209 if (*line == '-') has_dash = 1;
210 else if (*line == '|' || *line == ':' || isspace_c(*line)) { /* ok */ }
211 else return 0;
212 line++;
213 }
214 return has_dash;
215 }
216
217 // Forward declaration for process_inline
218 static void process_inline(StringBuffer *buf, const char *text, size_t len);
219
220 // Parse table cells from a row and append to buffer
221 static void parse_table_row(StringBuffer *buf, const char *line, int is_header)
222 {
223 const char *cell_tag = is_header ? "th" : "td";
224
225 buffer_append(buf, "<tr>");
226
227 line = skip_whitespace(line);
228 if (*line == '|') line++; // Skip leading |
229
230 while (*line) {
231 // Skip whitespace before cell content
232 while (*line && isspace_c(*line)) line++;
233
234 // Find cell end (next | or end of line)
235 const char *cell_start = line;
236 while (*line && *line != '|') line++;
237
238 // Trim trailing whitespace from cell
239 const char *cell_end = line;
240 while (cell_end > cell_start && isspace_c(*(cell_end - 1))) cell_end--;
241
242 size_t cell_len = cell_end - cell_start;
243
244 // Only output cell if we have content or more cells coming
245 if (cell_len > 0 || *line == '|') {
246 buffer_append(buf, "<");
247 buffer_append(buf, cell_tag);
248 buffer_append(buf, ">");
249 if (cell_len > 0) {
250 process_inline(buf, cell_start, cell_len);
251 }
252 buffer_append(buf, "</");
253 buffer_append(buf, cell_tag);
254 buffer_append(buf, ">");
255 }
256
257 if (*line == '|') line++; // Skip |
258
259 // Check if this was the trailing |
260 const char *rest = line;
261 while (*rest && isspace_c(*rest)) rest++;
262 if (!*rest) break; // End of line after trailing |
263 }
264
265 buffer_append(buf, "</tr>");
266 }
267
268 // Process inline markdown
269 static void process_inline(StringBuffer *buf, const char *text, size_t len)
270 {
271 size_t i = 0;
272
273 while (i < len) {
274 // Links: [text](url)
275 if (text[i] == '[') {
276 size_t link_start = i + 1;
277 size_t link_end = link_start;
278 while (link_end < len && text[link_end] != ']') link_end++;
279
280 if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') {
281 size_t url_start = link_end + 2;
282 size_t url_end = url_start;
283 while (url_end < len && text[url_end] != ')') url_end++;
284
285 if (url_end < len) {
286 buffer_append(buf, "<a href=\"");
287 buffer_append_n(buf, text + url_start, url_end - url_start);
288 buffer_append(buf, "\">");
289 buffer_append_n(buf, text + link_start, link_end - link_start);
290 buffer_append(buf, "</a>");
291 i = url_end + 1;
292 continue;
293 }
294 }
295 }
296
297 // Images: ![alt](url)
298 if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') {
299 size_t alt_start = i + 2;
300 size_t alt_end = alt_start;
301 while (alt_end < len && text[alt_end] != ']') alt_end++;
302
303 if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') {
304 size_t url_start = alt_end + 2;
305 size_t url_end = url_start;
306 while (url_end < len && text[url_end] != ')') url_end++;
307
308 if (url_end < len) {
309 buffer_append(buf, "<img src=\"");
310 buffer_append_n(buf, text + url_start, url_end - url_start);
311 buffer_append(buf, "\" alt=\"");
312 buffer_append_n(buf, text + alt_start, alt_end - alt_start);
313 buffer_append(buf, "\">");
314 i = url_end + 1;
315 continue;
316 }
317 }
318 }
319
320 // Bold: **text** or __text__
321 if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') ||
322 (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) {
323 char marker = text[i];
324 size_t start = i + 2;
325 size_t end = start;
326 while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++;
327
328 if (end + 1 < len) {
329 buffer_append(buf, "<strong>");
330 process_inline(buf, text + start, end - start);
331 buffer_append(buf, "</strong>");
332 i = end + 2;
333 continue;
334 }
335 }
336
337 // Strikethrough: ~~text~~
338 if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') {
339 size_t start = i + 2;
340 size_t end = start;
341 while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++;
342
343 if (end + 1 < len) {
344 buffer_append(buf, "<del>");
345 process_inline(buf, text + start, end - start);
346 buffer_append(buf, "</del>");
347 i = end + 2;
348 continue;
349 }
350 }
351
352 // Italic: *text* or _text_
353 if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace_c(text[i + 1])) {
354 char marker = text[i];
355 size_t start = i + 1;
356 size_t end = start;
357 while (end < len && text[end] != marker) end++;
358
359 if (end < len && end > start) {
360 buffer_append(buf, "<em>");
361 process_inline(buf, text + start, end - start);
362 buffer_append(buf, "</em>");
363 i = end + 1;
364 continue;
365 }
366 }
367
368 // Inline code: `code`
369 if (text[i] == '`') {
370 size_t start = i + 1;
371 size_t end = start;
372 while (end < len && text[end] != '`') end++;
373
374 if (end < len) {
375 buffer_append(buf, "<code>");
376 buffer_append_n(buf, text + start, end - start);
377 buffer_append(buf, "</code>");
378 i = end + 1;
379 continue;
380 }
381 }
382
383 // HTML escape
384 if (text[i] == '<') {
385 buffer_append(buf, "&lt;");
386 } else if (text[i] == '>') {
387 buffer_append(buf, "&gt;");
388 } else if (text[i] == '&') {
389 buffer_append(buf, "&amp;");
390 } else {
391 buffer_append_char(buf, text[i]);
392 }
393 i++;
394 }
395 }
396
397 // Append heading tag
398 static void append_heading_tag(StringBuffer *buf, int level, int closing)
399 {
400 buffer_append_char(buf, '<');
401 if (closing) buffer_append_char(buf, '/');
402 buffer_append_char(buf, 'h');
403 buffer_append_char(buf, '0' + level);
404 buffer_append_char(buf, '>');
405 }
406
407 // Convert markdown to HTML
408 WASM_EXPORT char *markdown_to_html(const char *markdown)
409 {
410 if (!markdown) return 0;
411
412 StringBuffer *buf = buffer_create(4096);
413 if (!buf) return 0;
414
415 const char *ptr = markdown;
416 const char *line_start;
417
418 while (*ptr) {
419 line_start = ptr;
420
421 // Find end of line
422 while (*ptr && *ptr != '\n') ptr++;
423 size_t line_len = ptr - line_start;
424
425 // Create line copy
426 char *line = (char *)malloc(line_len + 1);
427 if (!line) return buf->data;
428 memcpy(line, line_start, line_len);
429 line[line_len] = '\0';
430
431 // Skip empty lines
432 if (is_empty_line(line)) {
433 if (*ptr == '\n') ptr++;
434 continue;
435 }
436
437 // Headings
438 int heading_level = count_heading_level(line);
439 if (heading_level > 0) {
440 const char *content = skip_whitespace(line);
441 while (*content == '#') content++;
442 content = skip_whitespace(content);
443
444 append_heading_tag(buf, heading_level, 0);
445 process_inline(buf, content, strlen(content));
446 append_heading_tag(buf, heading_level, 1);
447
448 if (*ptr == '\n') ptr++;
449 continue;
450 }
451
452 // Code block
453 if (starts_with(line, "```")) {
454 buffer_append(buf, "<pre><code>");
455 if (*ptr == '\n') ptr++;
456
457 while (*ptr) {
458 line_start = ptr;
459 while (*ptr && *ptr != '\n') ptr++;
460 line_len = ptr - line_start;
461
462 char *code_line = (char *)malloc(line_len + 1);
463 if (!code_line) break;
464 memcpy(code_line, line_start, line_len);
465 code_line[line_len] = '\0';
466
467 if (starts_with(code_line, "```")) {
468 if (*ptr == '\n') ptr++;
469 break;
470 }
471
472 for (size_t i = 0; i < line_len; i++) {
473 if (code_line[i] == '<') buffer_append(buf, "&lt;");
474 else if (code_line[i] == '>') buffer_append(buf, "&gt;");
475 else if (code_line[i] == '&') buffer_append(buf, "&amp;");
476 else buffer_append_char(buf, code_line[i]);
477 }
478 buffer_append_char(buf, '\n');
479
480 if (*ptr == '\n') ptr++;
481 }
482
483 buffer_append(buf, "</code></pre>");
484 continue;
485 }
486
487 // Blockquote
488 if (starts_with(line, ">")) {
489 buffer_append(buf, "<blockquote>");
490
491 while (1) {
492 const char *content = skip_whitespace(line);
493 if (*content == '>') content++;
494 content = skip_whitespace(content);
495 process_inline(buf, content, strlen(content));
496 buffer_append_char(buf, ' ');
497
498 if (*ptr == '\n') ptr++;
499 if (!*ptr) break;
500
501 line_start = ptr;
502 while (*ptr && *ptr != '\n') ptr++;
503 line_len = ptr - line_start;
504
505 line = (char *)malloc(line_len + 1);
506 if (!line) break;
507 memcpy(line, line_start, line_len);
508 line[line_len] = '\0';
509
510 if (!starts_with(line, ">")) {
511 ptr = line_start;
512 break;
513 }
514 }
515
516 buffer_append(buf, "</blockquote>");
517 continue;
518 }
519
520 // Horizontal rule
521 if (is_horizontal_rule(line)) {
522 buffer_append(buf, "<hr>");
523 if (*ptr == '\n') ptr++;
524 continue;
525 }
526
527 // Unordered list
528 if (is_unordered_list(line)) {
529 buffer_append(buf, "<ul>");
530
531 while (1) {
532 const char *content = skip_whitespace(line);
533 content += 2;
534
535 buffer_append(buf, "<li>");
536 process_inline(buf, content, strlen(content));
537 buffer_append(buf, "</li>");
538
539 if (*ptr == '\n') ptr++;
540 if (!*ptr) break;
541
542 line_start = ptr;
543 while (*ptr && *ptr != '\n') ptr++;
544 line_len = ptr - line_start;
545
546 line = (char *)malloc(line_len + 1);
547 if (!line) break;
548 memcpy(line, line_start, line_len);
549 line[line_len] = '\0';
550
551 if (!is_unordered_list(line)) {
552 ptr = line_start;
553 break;
554 }
555 }
556
557 buffer_append(buf, "</ul>");
558 continue;
559 }
560
561 // Ordered list
562 if (is_ordered_list(line)) {
563 buffer_append(buf, "<ol>");
564
565 while (1) {
566 const char *content = skip_whitespace(line);
567 while (*content && isdigit_c(*content)) content++;
568 if (*content == '.') content++;
569 content = skip_whitespace(content);
570
571 buffer_append(buf, "<li>");
572 process_inline(buf, content, strlen(content));
573 buffer_append(buf, "</li>");
574
575 if (*ptr == '\n') ptr++;
576 if (!*ptr) break;
577
578 line_start = ptr;
579 while (*ptr && *ptr != '\n') ptr++;
580 line_len = ptr - line_start;
581
582 line = (char *)malloc(line_len + 1);
583 if (!line) break;
584 memcpy(line, line_start, line_len);
585 line[line_len] = '\0';
586
587 if (!is_ordered_list(line)) {
588 ptr = line_start;
589 break;
590 }
591 }
592
593 buffer_append(buf, "</ol>");
594 continue;
595 }
596
597 // Table
598 if (is_table_row(line)) {
599 // Check if next line is a separator (to confirm this is a table)
600 const char *peek_ptr = ptr;
601 if (*peek_ptr == '\n') peek_ptr++;
602
603 const char *next_line_start = peek_ptr;
604 while (*peek_ptr && *peek_ptr != '\n') peek_ptr++;
605 size_t next_line_len = peek_ptr - next_line_start;
606
607 char *next_line = (char *)malloc(next_line_len + 1);
608 if (next_line) {
609 memcpy(next_line, next_line_start, next_line_len);
610 next_line[next_line_len] = '\0';
611
612 if (is_table_separator(next_line)) {
613 // It's a valid table
614 buffer_append(buf, "<table>");
615
616 // Header row
617 buffer_append(buf, "<thead>");
618 parse_table_row(buf, line, 1);
619 buffer_append(buf, "</thead>");
620
621 // Skip to after separator
622 if (*ptr == '\n') ptr++;
623 ptr = peek_ptr;
624 if (*ptr == '\n') ptr++;
625
626 // Body rows
627 buffer_append(buf, "<tbody>");
628 while (*ptr) {
629 line_start = ptr;
630 while (*ptr && *ptr != '\n') ptr++;
631 line_len = ptr - line_start;
632
633 line = (char *)malloc(line_len + 1);
634 if (!line) break;
635 memcpy(line, line_start, line_len);
636 line[line_len] = '\0';
637
638 if (!is_table_row(line) || is_empty_line(line)) {
639 ptr = line_start;
640 break;
641 }
642
643 parse_table_row(buf, line, 0);
644 if (*ptr == '\n') ptr++;
645 }
646 buffer_append(buf, "</tbody>");
647
648 buffer_append(buf, "</table>");
649 continue;
650 }
651 }
652 }
653
654 // Paragraph
655 buffer_append(buf, "<p>");
656
657 while (1) {
658 const char *content = skip_whitespace(line);
659 process_inline(buf, content, strlen(content));
660
661 if (*ptr == '\n') ptr++;
662 if (!*ptr) break;
663
664 line_start = ptr;
665 while (*ptr && *ptr != '\n') ptr++;
666 line_len = ptr - line_start;
667
668 line = (char *)malloc(line_len + 1);
669 if (!line) break;
670 memcpy(line, line_start, line_len);
671 line[line_len] = '\0';
672
673 if (is_empty_line(line) ||
674 count_heading_level(line) > 0 ||
675 starts_with(line, "```") ||
676 starts_with(line, ">") ||
677 is_horizontal_rule(line) ||
678 is_unordered_list(line) ||
679 is_ordered_list(line) ||
680 is_table_row(line)) {
681 ptr = line_start;
682 break;
683 }
684
685 buffer_append_char(buf, ' ');
686 }
687
688 buffer_append(buf, "</p>");
689 }
690
691 return buf->data;
692 }
693
694 // Get string length (for JS interop)
695 WASM_EXPORT size_t markdown_strlen(const char *str)
696 {
697 return str ? strlen(str) : 0;
698 }