comparison markdown_converter/markdown_to_html.c @ 154:bdcc610eeed8

[Markdown Converter][GuiZe] Added markdown coverter in C and wasm rule sets. Needs further view on this as I haven't taken a look. Written by Claude.
author June Park <parkjune1995@gmail.com>
date Mon, 12 Jan 2026 09:11:58 -0800
parents
children cd35e600ae34
comparison
equal deleted inserted replaced
153:790930d9bb90 154:bdcc610eeed8
1 /**
2 * Markdown to HTML Converter - C Implementation
3 * Supports: headers, bold, italic, links, lists, code blocks, blockquotes, horizontal rules
4 */
5
6 #include "markdown_to_html.h"
7 #include <string.h>
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <ctype.h>
11
12 #define INITIAL_BUFFER_SIZE 4096
13
14 // String buffer for building HTML output
15 typedef struct {
16 char *data;
17 size_t length;
18 size_t capacity;
19 } StringBuffer;
20
21 static StringBuffer *buffer_create(size_t initial_capacity)
22 {
23 StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer));
24 if (!buf) return NULL;
25
26 buf->data = (char *)malloc(initial_capacity);
27 if (!buf->data) {
28 free(buf);
29 return NULL;
30 }
31 buf->data[0] = '\0';
32 buf->length = 0;
33 buf->capacity = initial_capacity;
34 return buf;
35 }
36
37 static void buffer_grow(StringBuffer *buf, size_t needed)
38 {
39 if (buf->length + needed + 1 > buf->capacity) {
40 size_t new_capacity = buf->capacity * 2;
41 while (new_capacity < buf->length + needed + 1)
42 new_capacity *= 2;
43
44 char *new_data = (char *)realloc(buf->data, new_capacity);
45 if (new_data) {
46 buf->data = new_data;
47 buf->capacity = new_capacity;
48 }
49 }
50 }
51
52 static void buffer_append(StringBuffer *buf, const char *str)
53 {
54 size_t len = strlen(str);
55 buffer_grow(buf, len);
56 memcpy(buf->data + buf->length, str, len + 1);
57 buf->length += len;
58 }
59
60 static void buffer_append_n(StringBuffer *buf, const char *str, size_t n)
61 {
62 buffer_grow(buf, n);
63 memcpy(buf->data + buf->length, str, n);
64 buf->length += n;
65 buf->data[buf->length] = '\0';
66 }
67
68 static void buffer_append_char(StringBuffer *buf, char c)
69 {
70 buffer_grow(buf, 1);
71 buf->data[buf->length++] = c;
72 buf->data[buf->length] = '\0';
73 }
74
75 static void buffer_free(StringBuffer *buf)
76 {
77 if (buf) {
78 free(buf->data);
79 free(buf);
80 }
81 }
82
83 // Check if line starts with pattern (after trimming whitespace)
84 static int starts_with(const char *line, const char *pattern)
85 {
86 while (*line && isspace((unsigned char)*line)) line++;
87 return strncmp(line, pattern, strlen(pattern)) == 0;
88 }
89
90 // Count leading # characters
91 static int count_heading_level(const char *line)
92 {
93 int count = 0;
94 while (*line && isspace((unsigned char)*line)) line++;
95 while (line[count] == '#' && count < 6) count++;
96 if (count > 0 && line[count] == ' ') return count;
97 return 0;
98 }
99
100 // Skip whitespace
101 static const char *skip_whitespace(const char *str)
102 {
103 while (*str && isspace((unsigned char)*str)) str++;
104 return str;
105 }
106
107 // Check if line is empty (only whitespace)
108 static int is_empty_line(const char *line)
109 {
110 while (*line) {
111 if (!isspace((unsigned char)*line)) return 0;
112 line++;
113 }
114 return 1;
115 }
116
117 // Check if line is horizontal rule (---, ***, ___)
118 static int is_horizontal_rule(const char *line)
119 {
120 line = skip_whitespace(line);
121 char first = *line;
122 if (first != '-' && first != '*' && first != '_') return 0;
123
124 int count = 0;
125 while (*line) {
126 if (*line == first) count++;
127 else if (!isspace((unsigned char)*line)) return 0;
128 line++;
129 }
130 return count >= 3;
131 }
132
133 // Check if line is unordered list item
134 static int is_unordered_list(const char *line)
135 {
136 line = skip_whitespace(line);
137 return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' ';
138 }
139
140 // Check if line is ordered list item
141 static int is_ordered_list(const char *line)
142 {
143 line = skip_whitespace(line);
144 while (*line && isdigit((unsigned char)*line)) line++;
145 return *line == '.' && line[1] == ' ';
146 }
147
148 // Process inline markdown (bold, italic, code, links, strikethrough)
149 static void process_inline(StringBuffer *buf, const char *text, size_t len)
150 {
151 size_t i = 0;
152
153 while (i < len) {
154 // Links: [text](url)
155 if (text[i] == '[') {
156 size_t link_start = i + 1;
157 size_t link_end = link_start;
158 while (link_end < len && text[link_end] != ']') link_end++;
159
160 if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') {
161 size_t url_start = link_end + 2;
162 size_t url_end = url_start;
163 while (url_end < len && text[url_end] != ')') url_end++;
164
165 if (url_end < len) {
166 buffer_append(buf, "<a href=\"");
167 buffer_append_n(buf, text + url_start, url_end - url_start);
168 buffer_append(buf, "\">");
169 buffer_append_n(buf, text + link_start, link_end - link_start);
170 buffer_append(buf, "</a>");
171 i = url_end + 1;
172 continue;
173 }
174 }
175 }
176
177 // Images: ![alt](url)
178 if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') {
179 size_t alt_start = i + 2;
180 size_t alt_end = alt_start;
181 while (alt_end < len && text[alt_end] != ']') alt_end++;
182
183 if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') {
184 size_t url_start = alt_end + 2;
185 size_t url_end = url_start;
186 while (url_end < len && text[url_end] != ')') url_end++;
187
188 if (url_end < len) {
189 buffer_append(buf, "<img src=\"");
190 buffer_append_n(buf, text + url_start, url_end - url_start);
191 buffer_append(buf, "\" alt=\"");
192 buffer_append_n(buf, text + alt_start, alt_end - alt_start);
193 buffer_append(buf, "\">");
194 i = url_end + 1;
195 continue;
196 }
197 }
198 }
199
200 // Bold: **text** or __text__
201 if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') ||
202 (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) {
203 char marker = text[i];
204 size_t start = i + 2;
205 size_t end = start;
206 while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++;
207
208 if (end + 1 < len) {
209 buffer_append(buf, "<strong>");
210 process_inline(buf, text + start, end - start);
211 buffer_append(buf, "</strong>");
212 i = end + 2;
213 continue;
214 }
215 }
216
217 // Strikethrough: ~~text~~
218 if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') {
219 size_t start = i + 2;
220 size_t end = start;
221 while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++;
222
223 if (end + 1 < len) {
224 buffer_append(buf, "<del>");
225 process_inline(buf, text + start, end - start);
226 buffer_append(buf, "</del>");
227 i = end + 2;
228 continue;
229 }
230 }
231
232 // Italic: *text* or _text_
233 if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace((unsigned char)text[i + 1])) {
234 char marker = text[i];
235 size_t start = i + 1;
236 size_t end = start;
237 while (end < len && text[end] != marker) end++;
238
239 if (end < len && end > start) {
240 buffer_append(buf, "<em>");
241 process_inline(buf, text + start, end - start);
242 buffer_append(buf, "</em>");
243 i = end + 1;
244 continue;
245 }
246 }
247
248 // Inline code: `code`
249 if (text[i] == '`') {
250 size_t start = i + 1;
251 size_t end = start;
252 while (end < len && text[end] != '`') end++;
253
254 if (end < len) {
255 buffer_append(buf, "<code>");
256 buffer_append_n(buf, text + start, end - start);
257 buffer_append(buf, "</code>");
258 i = end + 1;
259 continue;
260 }
261 }
262
263 // HTML escape special characters
264 if (text[i] == '<') {
265 buffer_append(buf, "&lt;");
266 } else if (text[i] == '>') {
267 buffer_append(buf, "&gt;");
268 } else if (text[i] == '&') {
269 buffer_append(buf, "&amp;");
270 } else {
271 buffer_append_char(buf, text[i]);
272 }
273 i++;
274 }
275 }
276
277 // Convert markdown to HTML
278 MDAPI char *markdown_to_html(const char *markdown)
279 {
280 if (!markdown) return NULL;
281
282 StringBuffer *buf = buffer_create(INITIAL_BUFFER_SIZE);
283 if (!buf) return NULL;
284
285 const char *ptr = markdown;
286 const char *line_start;
287
288 while (*ptr) {
289 line_start = ptr;
290
291 // Find end of line
292 while (*ptr && *ptr != '\n') ptr++;
293 size_t line_len = ptr - line_start;
294
295 // Create null-terminated line copy
296 char *line = (char *)malloc(line_len + 1);
297 if (!line) {
298 buffer_free(buf);
299 return NULL;
300 }
301 memcpy(line, line_start, line_len);
302 line[line_len] = '\0';
303
304 // Skip empty lines
305 if (is_empty_line(line)) {
306 free(line);
307 if (*ptr == '\n') ptr++;
308 continue;
309 }
310
311 // Headings: # H1, ## H2, etc.
312 int heading_level = count_heading_level(line);
313 if (heading_level > 0) {
314 const char *content = skip_whitespace(line);
315 while (*content == '#') content++;
316 content = skip_whitespace(content);
317
318 char tag[8];
319 snprintf(tag, sizeof(tag), "<h%d>", heading_level);
320 buffer_append(buf, tag);
321 process_inline(buf, content, strlen(content));
322 snprintf(tag, sizeof(tag), "</h%d>", heading_level);
323 buffer_append(buf, tag);
324
325 free(line);
326 if (*ptr == '\n') ptr++;
327 continue;
328 }
329
330 // Code block: ```
331 if (starts_with(line, "```")) {
332 buffer_append(buf, "<pre><code>");
333 free(line);
334 if (*ptr == '\n') ptr++;
335
336 // Collect code content
337 while (*ptr) {
338 line_start = ptr;
339 while (*ptr && *ptr != '\n') ptr++;
340 line_len = ptr - line_start;
341
342 line = (char *)malloc(line_len + 1);
343 if (!line) break;
344 memcpy(line, line_start, line_len);
345 line[line_len] = '\0';
346
347 if (starts_with(line, "```")) {
348 free(line);
349 if (*ptr == '\n') ptr++;
350 break;
351 }
352
353 // Escape HTML in code blocks
354 for (size_t i = 0; i < line_len; i++) {
355 if (line[i] == '<') buffer_append(buf, "&lt;");
356 else if (line[i] == '>') buffer_append(buf, "&gt;");
357 else if (line[i] == '&') buffer_append(buf, "&amp;");
358 else buffer_append_char(buf, line[i]);
359 }
360 buffer_append_char(buf, '\n');
361
362 free(line);
363 if (*ptr == '\n') ptr++;
364 }
365
366 buffer_append(buf, "</code></pre>");
367 continue;
368 }
369
370 // Blockquote: >
371 if (starts_with(line, ">")) {
372 buffer_append(buf, "<blockquote>");
373
374 while (1) {
375 const char *content = skip_whitespace(line);
376 if (*content == '>') content++;
377 content = skip_whitespace(content);
378 process_inline(buf, content, strlen(content));
379 buffer_append_char(buf, ' ');
380
381 free(line);
382 if (*ptr == '\n') ptr++;
383
384 // Check next line
385 if (!*ptr) break;
386 line_start = ptr;
387 while (*ptr && *ptr != '\n') ptr++;
388 line_len = ptr - line_start;
389
390 line = (char *)malloc(line_len + 1);
391 if (!line) break;
392 memcpy(line, line_start, line_len);
393 line[line_len] = '\0';
394
395 if (!starts_with(line, ">")) {
396 // Put back the line pointer
397 ptr = line_start;
398 free(line);
399 break;
400 }
401 }
402
403 buffer_append(buf, "</blockquote>");
404 continue;
405 }
406
407 // Horizontal rule
408 if (is_horizontal_rule(line)) {
409 buffer_append(buf, "<hr>");
410 free(line);
411 if (*ptr == '\n') ptr++;
412 continue;
413 }
414
415 // Unordered list
416 if (is_unordered_list(line)) {
417 buffer_append(buf, "<ul>");
418
419 while (1) {
420 const char *content = skip_whitespace(line);
421 content += 2; // Skip "- " or "* " or "+ "
422
423 buffer_append(buf, "<li>");
424 process_inline(buf, content, strlen(content));
425 buffer_append(buf, "</li>");
426
427 free(line);
428 if (*ptr == '\n') ptr++;
429
430 // Check next line
431 if (!*ptr) break;
432 line_start = ptr;
433 while (*ptr && *ptr != '\n') ptr++;
434 line_len = ptr - line_start;
435
436 line = (char *)malloc(line_len + 1);
437 if (!line) break;
438 memcpy(line, line_start, line_len);
439 line[line_len] = '\0';
440
441 if (!is_unordered_list(line)) {
442 ptr = line_start;
443 free(line);
444 break;
445 }
446 }
447
448 buffer_append(buf, "</ul>");
449 continue;
450 }
451
452 // Ordered list
453 if (is_ordered_list(line)) {
454 buffer_append(buf, "<ol>");
455
456 while (1) {
457 const char *content = skip_whitespace(line);
458 while (*content && isdigit((unsigned char)*content)) content++;
459 if (*content == '.') content++;
460 content = skip_whitespace(content);
461
462 buffer_append(buf, "<li>");
463 process_inline(buf, content, strlen(content));
464 buffer_append(buf, "</li>");
465
466 free(line);
467 if (*ptr == '\n') ptr++;
468
469 // Check next line
470 if (!*ptr) break;
471 line_start = ptr;
472 while (*ptr && *ptr != '\n') ptr++;
473 line_len = ptr - line_start;
474
475 line = (char *)malloc(line_len + 1);
476 if (!line) break;
477 memcpy(line, line_start, line_len);
478 line[line_len] = '\0';
479
480 if (!is_ordered_list(line)) {
481 ptr = line_start;
482 free(line);
483 break;
484 }
485 }
486
487 buffer_append(buf, "</ol>");
488 continue;
489 }
490
491 // Regular paragraph
492 buffer_append(buf, "<p>");
493
494 while (1) {
495 const char *content = skip_whitespace(line);
496 process_inline(buf, content, strlen(content));
497
498 free(line);
499 if (*ptr == '\n') ptr++;
500
501 // Check next line - continue paragraph if not special
502 if (!*ptr) break;
503 line_start = ptr;
504 while (*ptr && *ptr != '\n') ptr++;
505 line_len = ptr - line_start;
506
507 line = (char *)malloc(line_len + 1);
508 if (!line) break;
509 memcpy(line, line_start, line_len);
510 line[line_len] = '\0';
511
512 if (is_empty_line(line) ||
513 count_heading_level(line) > 0 ||
514 starts_with(line, "```") ||
515 starts_with(line, ">") ||
516 is_horizontal_rule(line) ||
517 is_unordered_list(line) ||
518 is_ordered_list(line)) {
519 ptr = line_start;
520 free(line);
521 break;
522 }
523
524 buffer_append_char(buf, ' ');
525 }
526
527 buffer_append(buf, "</p>");
528 }
529
530 char *result = buf->data;
531 free(buf); // Free struct but not data
532 return result;
533 }
534
535 // Free the returned HTML string
536 MDAPI void markdown_free(char *html)
537 {
538 free(html);
539 }
540
541 // Get length of HTML string (for WASM memory allocation)
542 MDAPI size_t markdown_get_length(const char *html)
543 {
544 return html ? strlen(html) : 0;
545 }