comparison markdown_converter/markdown_to_html.c @ 173:827c6ac504cd hg-web

Merged in default here.
author MrJuneJune <me@mrjunejune.com>
date Mon, 19 Jan 2026 18:59:10 -0800
parents 1c0878eb17de
children 8c74204fd362
comparison
equal deleted inserted replaced
151:c033667da5f9 173:827c6ac504cd
1 #include <string.h>
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <ctype.h>
5 #include "markdown_converter/markdown_to_html.h"
6
7 #define INITIAL_BUFFER_SIZE 1024 * 1024 // 1MB
8
9 // String buffer for building HTML output
10 typedef struct {
11 char *data;
12 size_t length;
13 size_t capacity;
14 } StringBuffer;
15
16 static StringBuffer *buffer_create(size_t initial_capacity)
17 {
18 StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer));
19 if (!buf) return NULL;
20
21 buf->data = (char *)malloc(initial_capacity);
22 if (!buf->data) {
23 free(buf);
24 return NULL;
25 }
26 buf->data[0] = '\0';
27 buf->length = 0;
28 buf->capacity = initial_capacity;
29 return buf;
30 }
31
32 static void buffer_grow(StringBuffer *buf, size_t needed)
33 {
34 if (buf->length + needed + 1 > buf->capacity) {
35 size_t new_capacity = buf->capacity * 2;
36 while (new_capacity < buf->length + needed + 1)
37 new_capacity *= 2;
38
39 char *new_data = (char *)realloc(buf->data, new_capacity);
40 if (new_data) {
41 buf->data = new_data;
42 buf->capacity = new_capacity;
43 }
44 }
45 }
46
47 static void buffer_append(StringBuffer *buf, const char *str)
48 {
49 size_t len = strlen(str);
50 buffer_grow(buf, len);
51 memcpy(buf->data + buf->length, str, len + 1);
52 buf->length += len;
53 }
54
55 static void buffer_append_n(StringBuffer *buf, const char *str, size_t n)
56 {
57 buffer_grow(buf, n);
58 memcpy(buf->data + buf->length, str, n);
59 buf->length += n;
60 buf->data[buf->length] = '\0';
61 }
62
63 static void buffer_append_char(StringBuffer *buf, char c)
64 {
65 buffer_grow(buf, 1);
66 buf->data[buf->length++] = c;
67 buf->data[buf->length] = '\0';
68 }
69
70 static void buffer_free(StringBuffer *buf)
71 {
72 if (buf) {
73 free(buf->data);
74 free(buf);
75 }
76 }
77
78 // Check if line starts with pattern (after trimming whitespace)
79 static int starts_with(const char *line, const char *pattern)
80 {
81 while (*line && isspace((unsigned char)*line)) line++;
82 return strncmp(line, pattern, strlen(pattern)) == 0;
83 }
84
85 // Count leading # characters
86 static int count_heading_level(const char *line)
87 {
88 int count = 0;
89 while (*line && isspace((unsigned char)*line)) line++;
90 while (line[count] == '#' && count < 6) count++;
91 if (count > 0 && line[count] == ' ') return count;
92 return 0;
93 }
94
95 // Skip whitespace
96 static const char *skip_whitespace(const char *str)
97 {
98 while (*str && isspace((unsigned char)*str)) str++;
99 return str;
100 }
101
102 // Check if line is empty (only whitespace)
103 static int is_empty_line(const char *line)
104 {
105 while (*line) {
106 if (!isspace((unsigned char)*line)) return 0;
107 line++;
108 }
109 return 1;
110 }
111
112 // Check if line is horizontal rule (---, ***, ___)
113 static int is_horizontal_rule(const char *line)
114 {
115 line = skip_whitespace(line);
116 char first = *line;
117 if (first != '-' && first != '*' && first != '_') return 0;
118
119 int count = 0;
120 while (*line) {
121 if (*line == first) count++;
122 else if (!isspace((unsigned char)*line)) return 0;
123 line++;
124 }
125 return count >= 3;
126 }
127
128 // Check if line is unordered list item
129 static int is_unordered_list(const char *line)
130 {
131 line = skip_whitespace(line);
132 return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' ';
133 }
134
135 // Check if line is ordered list item
136 static int is_ordered_list(const char *line)
137 {
138 line = skip_whitespace(line);
139 while (*line && isdigit((unsigned char)*line)) line++;
140 return *line == '.' && line[1] == ' ';
141 }
142
143 // Process inline markdown (bold, italic, code, links, strikethrough)
144 static void process_inline(StringBuffer *buf, const char *text, size_t len)
145 {
146 size_t i = 0;
147
148 while (i < len) {
149 // Links: [text](url)
150 if (text[i] == '[') {
151 size_t link_start = i + 1;
152 size_t link_end = link_start;
153 while (link_end < len && text[link_end] != ']') link_end++;
154
155 if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') {
156 size_t url_start = link_end + 2;
157 size_t url_end = url_start;
158 while (url_end < len && text[url_end] != ')') url_end++;
159
160 if (url_end < len) {
161 buffer_append(buf, "<a href=\"");
162 buffer_append_n(buf, text + url_start, url_end - url_start);
163 buffer_append(buf, "\">");
164 buffer_append_n(buf, text + link_start, link_end - link_start);
165 buffer_append(buf, "</a>");
166 i = url_end + 1;
167 continue;
168 }
169 }
170 }
171
172 // Images: ![alt](url)
173 if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') {
174 size_t alt_start = i + 2;
175 size_t alt_end = alt_start;
176 while (alt_end < len && text[alt_end] != ']') alt_end++;
177
178 if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') {
179 size_t url_start = alt_end + 2;
180 size_t url_end = url_start;
181 while (url_end < len && text[url_end] != ')') url_end++;
182
183 if (url_end < len) {
184 buffer_append(buf, "<img src=\"");
185 buffer_append_n(buf, text + url_start, url_end - url_start);
186 buffer_append(buf, "\" alt=\"");
187 buffer_append_n(buf, text + alt_start, alt_end - alt_start);
188 buffer_append(buf, "\">");
189 i = url_end + 1;
190 continue;
191 }
192 }
193 }
194
195 // Bold: **text** or __text__
196 if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') ||
197 (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) {
198 char marker = text[i];
199 size_t start = i + 2;
200 size_t end = start;
201 while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++;
202
203 if (end + 1 < len) {
204 buffer_append(buf, "<strong>");
205 process_inline(buf, text + start, end - start);
206 buffer_append(buf, "</strong>");
207 i = end + 2;
208 continue;
209 }
210 }
211
212 // Strikethrough: ~~text~~
213 if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') {
214 size_t start = i + 2;
215 size_t end = start;
216 while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++;
217
218 if (end + 1 < len) {
219 buffer_append(buf, "<del>");
220 process_inline(buf, text + start, end - start);
221 buffer_append(buf, "</del>");
222 i = end + 2;
223 continue;
224 }
225 }
226
227 // Italic: *text* or _text_
228 if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace((unsigned char)text[i + 1])) {
229 char marker = text[i];
230 size_t start = i + 1;
231 size_t end = start;
232 while (end < len && text[end] != marker) end++;
233
234 if (end < len && end > start) {
235 buffer_append(buf, "<em>");
236 process_inline(buf, text + start, end - start);
237 buffer_append(buf, "</em>");
238 i = end + 1;
239 continue;
240 }
241 }
242
243 // Inline code: `code`
244 if (text[i] == '`') {
245 size_t start = i + 1;
246 size_t end = start;
247 while (end < len && text[end] != '`') end++;
248
249 if (end < len) {
250 buffer_append(buf, "<code>");
251 buffer_append_n(buf, text + start, end - start);
252 buffer_append(buf, "</code>");
253 i = end + 1;
254 continue;
255 }
256 }
257
258 // This might not be needed for now.
259 // HTML escape special characters
260 // if (text[i] == '<') {
261 // buffer_append(buf, "&lt;");
262 // } else if (text[i] == '>') {
263 // buffer_append(buf, "&gt;");
264 // } else if (text[i] == '&') {
265 // buffer_append(buf, "&amp;");
266 // } else {
267 // buffer_append_char(buf, text[i]);
268 // }
269 buffer_append_char(buf, text[i]);
270 i++;
271 }
272 }
273
274 // Convert markdown to HTML
275 MDAPI char *markdown_to_html(const char *markdown)
276 {
277 if (!markdown) return NULL;
278
279 StringBuffer *buf = buffer_create(INITIAL_BUFFER_SIZE);
280 if (!buf) return NULL;
281
282 const char *ptr = markdown;
283 const char *line_start;
284
285 while (*ptr) {
286 line_start = ptr;
287
288 // Find end of line
289 while (*ptr && *ptr != '\n') ptr++;
290 size_t line_len = ptr - line_start;
291
292 // Create null-terminated line copy
293 char *line = (char *)malloc(line_len + 1);
294 if (!line) {
295 buffer_free(buf);
296 return NULL;
297 }
298 memcpy(line, line_start, line_len);
299 line[line_len] = '\0';
300
301 // Skip empty lines
302 if (is_empty_line(line)) {
303 free(line);
304 if (*ptr == '\n') ptr++;
305 continue;
306 }
307
308 // Headings: # H1, ## H2, etc.
309 int heading_level = count_heading_level(line);
310 if (heading_level > 0) {
311 const char *content = skip_whitespace(line);
312 while (*content == '#') content++;
313 content = skip_whitespace(content);
314
315 char tag[8];
316 snprintf(tag, sizeof(tag), "<h%d>", heading_level);
317 buffer_append(buf, tag);
318 process_inline(buf, content, strlen(content));
319 snprintf(tag, sizeof(tag), "</h%d>", heading_level);
320 buffer_append(buf, tag);
321
322 free(line);
323 if (*ptr == '\n') ptr++;
324 continue;
325 }
326
327 // Code block: ```
328 if (starts_with(line, "```")) {
329 buffer_append(buf, "<pre><code>");
330 free(line);
331 if (*ptr == '\n') ptr++;
332
333 // Collect code content
334 while (*ptr) {
335 line_start = ptr;
336 while (*ptr && *ptr != '\n') ptr++;
337 line_len = ptr - line_start;
338
339 line = (char *)malloc(line_len + 1);
340 if (!line) break;
341 memcpy(line, line_start, line_len);
342 line[line_len] = '\0';
343
344 if (starts_with(line, "```")) {
345 free(line);
346 if (*ptr == '\n') ptr++;
347 break;
348 }
349
350 // Escape HTML in code blocks
351 for (size_t i = 0; i < line_len; i++) {
352 if (line[i] == '<') buffer_append(buf, "&lt;");
353 else if (line[i] == '>') buffer_append(buf, "&gt;");
354 else if (line[i] == '&') buffer_append(buf, "&amp;");
355 else buffer_append_char(buf, line[i]);
356 }
357 buffer_append_char(buf, '\n');
358
359 free(line);
360 if (*ptr == '\n') ptr++;
361 }
362
363 buffer_append(buf, "</code></pre>");
364 continue;
365 }
366
367 // Blockquote: >
368 if (starts_with(line, ">")) {
369 buffer_append(buf, "<blockquote>");
370
371 while (1) {
372 const char *content = skip_whitespace(line);
373 if (*content == '>') content++;
374 content = skip_whitespace(content);
375 process_inline(buf, content, strlen(content));
376 buffer_append_char(buf, ' ');
377
378 free(line);
379 if (*ptr == '\n') ptr++;
380
381 // Check next line
382 if (!*ptr) break;
383 line_start = ptr;
384 while (*ptr && *ptr != '\n') ptr++;
385 line_len = ptr - line_start;
386
387 line = (char *)malloc(line_len + 1);
388 if (!line) break;
389 memcpy(line, line_start, line_len);
390 line[line_len] = '\0';
391
392 if (!starts_with(line, ">")) {
393 // Put back the line pointer
394 ptr = line_start;
395 free(line);
396 break;
397 }
398 }
399
400 buffer_append(buf, "</blockquote>");
401 continue;
402 }
403
404 // Horizontal rule
405 if (is_horizontal_rule(line)) {
406 buffer_append(buf, "<hr>");
407 free(line);
408 if (*ptr == '\n') ptr++;
409 continue;
410 }
411
412 // Unordered list
413 if (is_unordered_list(line)) {
414 buffer_append(buf, "<ul>");
415
416 while (1) {
417 const char *content = skip_whitespace(line);
418 content += 2; // Skip "- " or "* " or "+ "
419
420 buffer_append(buf, "<li>");
421 process_inline(buf, content, strlen(content));
422 buffer_append(buf, "</li>");
423
424 free(line);
425 if (*ptr == '\n') ptr++;
426
427 // Check next line
428 if (!*ptr) break;
429 line_start = ptr;
430 while (*ptr && *ptr != '\n') ptr++;
431 line_len = ptr - line_start;
432
433 line = (char *)malloc(line_len + 1);
434 if (!line) break;
435 memcpy(line, line_start, line_len);
436 line[line_len] = '\0';
437
438 if (!is_unordered_list(line)) {
439 ptr = line_start;
440 free(line);
441 break;
442 }
443 }
444
445 buffer_append(buf, "</ul>");
446 continue;
447 }
448
449 // Ordered list
450 if (is_ordered_list(line)) {
451 buffer_append(buf, "<ol>");
452
453 while (1) {
454 const char *content = skip_whitespace(line);
455 while (*content && isdigit((unsigned char)*content)) content++;
456 if (*content == '.') content++;
457 content = skip_whitespace(content);
458
459 buffer_append(buf, "<li>");
460 process_inline(buf, content, strlen(content));
461 buffer_append(buf, "</li>");
462
463 free(line);
464 if (*ptr == '\n') ptr++;
465
466 // Check next line
467 if (!*ptr) break;
468 line_start = ptr;
469 while (*ptr && *ptr != '\n') ptr++;
470 line_len = ptr - line_start;
471
472 line = (char *)malloc(line_len + 1);
473 if (!line) break;
474 memcpy(line, line_start, line_len);
475 line[line_len] = '\0';
476
477 if (!is_ordered_list(line)) {
478 ptr = line_start;
479 free(line);
480 break;
481 }
482 }
483
484 buffer_append(buf, "</ol>");
485 continue;
486 }
487
488 // Regular paragraph
489 buffer_append(buf, "<p>");
490
491 while (1) {
492 const char *content = skip_whitespace(line);
493 process_inline(buf, content, strlen(content));
494
495 free(line);
496 if (*ptr == '\n') ptr++;
497
498 // Check next line - continue paragraph if not special
499 if (!*ptr) break;
500 line_start = ptr;
501 while (*ptr && *ptr != '\n') ptr++;
502 line_len = ptr - line_start;
503
504 line = (char *)malloc(line_len + 1);
505 if (!line) break;
506 memcpy(line, line_start, line_len);
507 line[line_len] = '\0';
508
509 if (is_empty_line(line) ||
510 count_heading_level(line) > 0 ||
511 starts_with(line, "```") ||
512 starts_with(line, ">") ||
513 is_horizontal_rule(line) ||
514 is_unordered_list(line) ||
515 is_ordered_list(line)) {
516 ptr = line_start;
517 free(line);
518 break;
519 }
520
521 buffer_append_char(buf, ' ');
522 }
523
524 buffer_append(buf, "</p>");
525 }
526
527 char *result = buf->data;
528 free(buf); // Free struct but not data
529 return result;
530 }
531
532 // Free the returned HTML string
533 MDAPI void markdown_free(char *html)
534 {
535 free(html);
536 }
537
538 // Get length of HTML string (for WASM memory allocation)
539 MDAPI size_t markdown_get_length(const char *html)
540 {
541 return html ? strlen(html) : 0;
542 }