comparison markdown_converter/markdown_to_html_wasm.c @ 154:bdcc610eeed8

[Markdown Converter][GuiZe] Added markdown coverter in C and wasm rule sets. Needs further view on this as I haven't taken a look. Written by Claude.
author June Park <parkjune1995@gmail.com>
date Mon, 12 Jan 2026 09:11:58 -0800
parents
children
comparison
equal deleted inserted replaced
153:790930d9bb90 154:bdcc610eeed8
1 /**
2 * Markdown to HTML Converter - Standalone WASM Implementation
3 * No libc dependencies - can be compiled with: clang --target=wasm32
4 */
5
6 #define WASM_EXPORT __attribute__((visibility("default")))
7
8 typedef unsigned long size_t;
9 typedef int int32_t;
10
11 // Simple bump allocator for WASM
12 #define HEAP_SIZE (1024 * 1024) // 1MB heap
13 static char heap[HEAP_SIZE];
14 static size_t heap_offset = 0;
15
16 WASM_EXPORT void *malloc(size_t size)
17 {
18 // Align to 8 bytes
19 size_t aligned_offset = (heap_offset + 7) & ~7;
20 if (aligned_offset + size > HEAP_SIZE) return 0;
21
22 void *ptr = &heap[aligned_offset];
23 heap_offset = aligned_offset + size;
24 return ptr;
25 }
26
27 WASM_EXPORT void free(void *ptr)
28 {
29 // Simple bump allocator - no actual free
30 (void)ptr;
31 }
32
33 WASM_EXPORT void heap_reset(void)
34 {
35 heap_offset = 0;
36 }
37
38 // String functions
39 static size_t strlen(const char *s)
40 {
41 size_t len = 0;
42 while (s[len]) len++;
43 return len;
44 }
45
46 static void *memcpy(void *dest, const void *src, size_t n)
47 {
48 char *d = (char *)dest;
49 const char *s = (const char *)src;
50 while (n--) *d++ = *s++;
51 return dest;
52 }
53
54 static int isspace_c(int c)
55 {
56 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v';
57 }
58
59 static int isdigit_c(int c)
60 {
61 return c >= '0' && c <= '9';
62 }
63
64 // String buffer for building HTML output
65 typedef struct {
66 char *data;
67 size_t length;
68 size_t capacity;
69 } StringBuffer;
70
71 static StringBuffer *buffer_create(size_t initial_capacity)
72 {
73 StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer));
74 if (!buf) return 0;
75
76 buf->data = (char *)malloc(initial_capacity);
77 if (!buf->data) return 0;
78
79 buf->data[0] = '\0';
80 buf->length = 0;
81 buf->capacity = initial_capacity;
82 return buf;
83 }
84
85 static void buffer_grow(StringBuffer *buf, size_t needed)
86 {
87 if (buf->length + needed + 1 > buf->capacity) {
88 size_t new_capacity = buf->capacity * 2;
89 while (new_capacity < buf->length + needed + 1)
90 new_capacity *= 2;
91
92 char *new_data = (char *)malloc(new_capacity);
93 if (new_data) {
94 memcpy(new_data, buf->data, buf->length + 1);
95 buf->data = new_data;
96 buf->capacity = new_capacity;
97 }
98 }
99 }
100
101 static void buffer_append(StringBuffer *buf, const char *str)
102 {
103 size_t len = strlen(str);
104 buffer_grow(buf, len);
105 memcpy(buf->data + buf->length, str, len + 1);
106 buf->length += len;
107 }
108
109 static void buffer_append_n(StringBuffer *buf, const char *str, size_t n)
110 {
111 buffer_grow(buf, n);
112 memcpy(buf->data + buf->length, str, n);
113 buf->length += n;
114 buf->data[buf->length] = '\0';
115 }
116
117 static void buffer_append_char(StringBuffer *buf, char c)
118 {
119 buffer_grow(buf, 1);
120 buf->data[buf->length++] = c;
121 buf->data[buf->length] = '\0';
122 }
123
124 // Check if line starts with pattern (after trimming whitespace)
125 static int starts_with(const char *line, const char *pattern)
126 {
127 while (*line && isspace_c(*line)) line++;
128 size_t plen = strlen(pattern);
129 for (size_t i = 0; i < plen; i++) {
130 if (line[i] != pattern[i]) return 0;
131 }
132 return 1;
133 }
134
135 // Count leading # characters
136 static int count_heading_level(const char *line)
137 {
138 int count = 0;
139 while (*line && isspace_c(*line)) line++;
140 while (line[count] == '#' && count < 6) count++;
141 if (count > 0 && line[count] == ' ') return count;
142 return 0;
143 }
144
145 // Skip whitespace
146 static const char *skip_whitespace(const char *str)
147 {
148 while (*str && isspace_c(*str)) str++;
149 return str;
150 }
151
152 // Check if line is empty
153 static int is_empty_line(const char *line)
154 {
155 while (*line) {
156 if (!isspace_c(*line)) return 0;
157 line++;
158 }
159 return 1;
160 }
161
162 // Check if line is horizontal rule
163 static int is_horizontal_rule(const char *line)
164 {
165 line = skip_whitespace(line);
166 char first = *line;
167 if (first != '-' && first != '*' && first != '_') return 0;
168
169 int count = 0;
170 while (*line) {
171 if (*line == first) count++;
172 else if (!isspace_c(*line)) return 0;
173 line++;
174 }
175 return count >= 3;
176 }
177
178 // Check if line is unordered list item
179 static int is_unordered_list(const char *line)
180 {
181 line = skip_whitespace(line);
182 return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' ';
183 }
184
185 // Check if line is ordered list item
186 static int is_ordered_list(const char *line)
187 {
188 line = skip_whitespace(line);
189 while (*line && isdigit_c(*line)) line++;
190 return *line == '.' && line[1] == ' ';
191 }
192
193 // Process inline markdown
194 static void process_inline(StringBuffer *buf, const char *text, size_t len)
195 {
196 size_t i = 0;
197
198 while (i < len) {
199 // Links: [text](url)
200 if (text[i] == '[') {
201 size_t link_start = i + 1;
202 size_t link_end = link_start;
203 while (link_end < len && text[link_end] != ']') link_end++;
204
205 if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') {
206 size_t url_start = link_end + 2;
207 size_t url_end = url_start;
208 while (url_end < len && text[url_end] != ')') url_end++;
209
210 if (url_end < len) {
211 buffer_append(buf, "<a href=\"");
212 buffer_append_n(buf, text + url_start, url_end - url_start);
213 buffer_append(buf, "\">");
214 buffer_append_n(buf, text + link_start, link_end - link_start);
215 buffer_append(buf, "</a>");
216 i = url_end + 1;
217 continue;
218 }
219 }
220 }
221
222 // Images: ![alt](url)
223 if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') {
224 size_t alt_start = i + 2;
225 size_t alt_end = alt_start;
226 while (alt_end < len && text[alt_end] != ']') alt_end++;
227
228 if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') {
229 size_t url_start = alt_end + 2;
230 size_t url_end = url_start;
231 while (url_end < len && text[url_end] != ')') url_end++;
232
233 if (url_end < len) {
234 buffer_append(buf, "<img src=\"");
235 buffer_append_n(buf, text + url_start, url_end - url_start);
236 buffer_append(buf, "\" alt=\"");
237 buffer_append_n(buf, text + alt_start, alt_end - alt_start);
238 buffer_append(buf, "\">");
239 i = url_end + 1;
240 continue;
241 }
242 }
243 }
244
245 // Bold: **text** or __text__
246 if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') ||
247 (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) {
248 char marker = text[i];
249 size_t start = i + 2;
250 size_t end = start;
251 while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++;
252
253 if (end + 1 < len) {
254 buffer_append(buf, "<strong>");
255 process_inline(buf, text + start, end - start);
256 buffer_append(buf, "</strong>");
257 i = end + 2;
258 continue;
259 }
260 }
261
262 // Strikethrough: ~~text~~
263 if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') {
264 size_t start = i + 2;
265 size_t end = start;
266 while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++;
267
268 if (end + 1 < len) {
269 buffer_append(buf, "<del>");
270 process_inline(buf, text + start, end - start);
271 buffer_append(buf, "</del>");
272 i = end + 2;
273 continue;
274 }
275 }
276
277 // Italic: *text* or _text_
278 if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace_c(text[i + 1])) {
279 char marker = text[i];
280 size_t start = i + 1;
281 size_t end = start;
282 while (end < len && text[end] != marker) end++;
283
284 if (end < len && end > start) {
285 buffer_append(buf, "<em>");
286 process_inline(buf, text + start, end - start);
287 buffer_append(buf, "</em>");
288 i = end + 1;
289 continue;
290 }
291 }
292
293 // Inline code: `code`
294 if (text[i] == '`') {
295 size_t start = i + 1;
296 size_t end = start;
297 while (end < len && text[end] != '`') end++;
298
299 if (end < len) {
300 buffer_append(buf, "<code>");
301 buffer_append_n(buf, text + start, end - start);
302 buffer_append(buf, "</code>");
303 i = end + 1;
304 continue;
305 }
306 }
307
308 // HTML escape
309 if (text[i] == '<') {
310 buffer_append(buf, "&lt;");
311 } else if (text[i] == '>') {
312 buffer_append(buf, "&gt;");
313 } else if (text[i] == '&') {
314 buffer_append(buf, "&amp;");
315 } else {
316 buffer_append_char(buf, text[i]);
317 }
318 i++;
319 }
320 }
321
322 // Append heading tag
323 static void append_heading_tag(StringBuffer *buf, int level, int closing)
324 {
325 buffer_append_char(buf, '<');
326 if (closing) buffer_append_char(buf, '/');
327 buffer_append_char(buf, 'h');
328 buffer_append_char(buf, '0' + level);
329 buffer_append_char(buf, '>');
330 }
331
332 // Convert markdown to HTML
333 WASM_EXPORT char *markdown_to_html(const char *markdown)
334 {
335 if (!markdown) return 0;
336
337 StringBuffer *buf = buffer_create(4096);
338 if (!buf) return 0;
339
340 const char *ptr = markdown;
341 const char *line_start;
342
343 while (*ptr) {
344 line_start = ptr;
345
346 // Find end of line
347 while (*ptr && *ptr != '\n') ptr++;
348 size_t line_len = ptr - line_start;
349
350 // Create line copy
351 char *line = (char *)malloc(line_len + 1);
352 if (!line) return buf->data;
353 memcpy(line, line_start, line_len);
354 line[line_len] = '\0';
355
356 // Skip empty lines
357 if (is_empty_line(line)) {
358 if (*ptr == '\n') ptr++;
359 continue;
360 }
361
362 // Headings
363 int heading_level = count_heading_level(line);
364 if (heading_level > 0) {
365 const char *content = skip_whitespace(line);
366 while (*content == '#') content++;
367 content = skip_whitespace(content);
368
369 append_heading_tag(buf, heading_level, 0);
370 process_inline(buf, content, strlen(content));
371 append_heading_tag(buf, heading_level, 1);
372
373 if (*ptr == '\n') ptr++;
374 continue;
375 }
376
377 // Code block
378 if (starts_with(line, "```")) {
379 buffer_append(buf, "<pre><code>");
380 if (*ptr == '\n') ptr++;
381
382 while (*ptr) {
383 line_start = ptr;
384 while (*ptr && *ptr != '\n') ptr++;
385 line_len = ptr - line_start;
386
387 char *code_line = (char *)malloc(line_len + 1);
388 if (!code_line) break;
389 memcpy(code_line, line_start, line_len);
390 code_line[line_len] = '\0';
391
392 if (starts_with(code_line, "```")) {
393 if (*ptr == '\n') ptr++;
394 break;
395 }
396
397 for (size_t i = 0; i < line_len; i++) {
398 if (code_line[i] == '<') buffer_append(buf, "&lt;");
399 else if (code_line[i] == '>') buffer_append(buf, "&gt;");
400 else if (code_line[i] == '&') buffer_append(buf, "&amp;");
401 else buffer_append_char(buf, code_line[i]);
402 }
403 buffer_append_char(buf, '\n');
404
405 if (*ptr == '\n') ptr++;
406 }
407
408 buffer_append(buf, "</code></pre>");
409 continue;
410 }
411
412 // Blockquote
413 if (starts_with(line, ">")) {
414 buffer_append(buf, "<blockquote>");
415
416 while (1) {
417 const char *content = skip_whitespace(line);
418 if (*content == '>') content++;
419 content = skip_whitespace(content);
420 process_inline(buf, content, strlen(content));
421 buffer_append_char(buf, ' ');
422
423 if (*ptr == '\n') ptr++;
424 if (!*ptr) break;
425
426 line_start = ptr;
427 while (*ptr && *ptr != '\n') ptr++;
428 line_len = ptr - line_start;
429
430 line = (char *)malloc(line_len + 1);
431 if (!line) break;
432 memcpy(line, line_start, line_len);
433 line[line_len] = '\0';
434
435 if (!starts_with(line, ">")) {
436 ptr = line_start;
437 break;
438 }
439 }
440
441 buffer_append(buf, "</blockquote>");
442 continue;
443 }
444
445 // Horizontal rule
446 if (is_horizontal_rule(line)) {
447 buffer_append(buf, "<hr>");
448 if (*ptr == '\n') ptr++;
449 continue;
450 }
451
452 // Unordered list
453 if (is_unordered_list(line)) {
454 buffer_append(buf, "<ul>");
455
456 while (1) {
457 const char *content = skip_whitespace(line);
458 content += 2;
459
460 buffer_append(buf, "<li>");
461 process_inline(buf, content, strlen(content));
462 buffer_append(buf, "</li>");
463
464 if (*ptr == '\n') ptr++;
465 if (!*ptr) break;
466
467 line_start = ptr;
468 while (*ptr && *ptr != '\n') ptr++;
469 line_len = ptr - line_start;
470
471 line = (char *)malloc(line_len + 1);
472 if (!line) break;
473 memcpy(line, line_start, line_len);
474 line[line_len] = '\0';
475
476 if (!is_unordered_list(line)) {
477 ptr = line_start;
478 break;
479 }
480 }
481
482 buffer_append(buf, "</ul>");
483 continue;
484 }
485
486 // Ordered list
487 if (is_ordered_list(line)) {
488 buffer_append(buf, "<ol>");
489
490 while (1) {
491 const char *content = skip_whitespace(line);
492 while (*content && isdigit_c(*content)) content++;
493 if (*content == '.') content++;
494 content = skip_whitespace(content);
495
496 buffer_append(buf, "<li>");
497 process_inline(buf, content, strlen(content));
498 buffer_append(buf, "</li>");
499
500 if (*ptr == '\n') ptr++;
501 if (!*ptr) break;
502
503 line_start = ptr;
504 while (*ptr && *ptr != '\n') ptr++;
505 line_len = ptr - line_start;
506
507 line = (char *)malloc(line_len + 1);
508 if (!line) break;
509 memcpy(line, line_start, line_len);
510 line[line_len] = '\0';
511
512 if (!is_ordered_list(line)) {
513 ptr = line_start;
514 break;
515 }
516 }
517
518 buffer_append(buf, "</ol>");
519 continue;
520 }
521
522 // Paragraph
523 buffer_append(buf, "<p>");
524
525 while (1) {
526 const char *content = skip_whitespace(line);
527 process_inline(buf, content, strlen(content));
528
529 if (*ptr == '\n') ptr++;
530 if (!*ptr) break;
531
532 line_start = ptr;
533 while (*ptr && *ptr != '\n') ptr++;
534 line_len = ptr - line_start;
535
536 line = (char *)malloc(line_len + 1);
537 if (!line) break;
538 memcpy(line, line_start, line_len);
539 line[line_len] = '\0';
540
541 if (is_empty_line(line) ||
542 count_heading_level(line) > 0 ||
543 starts_with(line, "```") ||
544 starts_with(line, ">") ||
545 is_horizontal_rule(line) ||
546 is_unordered_list(line) ||
547 is_ordered_list(line)) {
548 ptr = line_start;
549 break;
550 }
551
552 buffer_append_char(buf, ' ');
553 }
554
555 buffer_append(buf, "</p>");
556 }
557
558 return buf->data;
559 }
560
561 // Get string length (for JS interop)
562 WASM_EXPORT size_t markdown_strlen(const char *str)
563 {
564 return str ? strlen(str) : 0;
565 }