Mercurial
comparison markdown_converter/markdown_to_html.c @ 154:bdcc610eeed8
[Markdown Converter][GuiZe] Added markdown coverter in C and wasm rule sets. Needs further view on this as I haven't taken a look. Written by Claude.
| author | June Park <parkjune1995@gmail.com> |
|---|---|
| date | Mon, 12 Jan 2026 09:11:58 -0800 |
| parents | |
| children | cd35e600ae34 |
comparison
equal
deleted
inserted
replaced
| 153:790930d9bb90 | 154:bdcc610eeed8 |
|---|---|
| 1 /** | |
| 2 * Markdown to HTML Converter - C Implementation | |
| 3 * Supports: headers, bold, italic, links, lists, code blocks, blockquotes, horizontal rules | |
| 4 */ | |
| 5 | |
| 6 #include "markdown_to_html.h" | |
| 7 #include <string.h> | |
| 8 #include <stdlib.h> | |
| 9 #include <stdio.h> | |
| 10 #include <ctype.h> | |
| 11 | |
| 12 #define INITIAL_BUFFER_SIZE 4096 | |
| 13 | |
| 14 // String buffer for building HTML output | |
| 15 typedef struct { | |
| 16 char *data; | |
| 17 size_t length; | |
| 18 size_t capacity; | |
| 19 } StringBuffer; | |
| 20 | |
| 21 static StringBuffer *buffer_create(size_t initial_capacity) | |
| 22 { | |
| 23 StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer)); | |
| 24 if (!buf) return NULL; | |
| 25 | |
| 26 buf->data = (char *)malloc(initial_capacity); | |
| 27 if (!buf->data) { | |
| 28 free(buf); | |
| 29 return NULL; | |
| 30 } | |
| 31 buf->data[0] = '\0'; | |
| 32 buf->length = 0; | |
| 33 buf->capacity = initial_capacity; | |
| 34 return buf; | |
| 35 } | |
| 36 | |
| 37 static void buffer_grow(StringBuffer *buf, size_t needed) | |
| 38 { | |
| 39 if (buf->length + needed + 1 > buf->capacity) { | |
| 40 size_t new_capacity = buf->capacity * 2; | |
| 41 while (new_capacity < buf->length + needed + 1) | |
| 42 new_capacity *= 2; | |
| 43 | |
| 44 char *new_data = (char *)realloc(buf->data, new_capacity); | |
| 45 if (new_data) { | |
| 46 buf->data = new_data; | |
| 47 buf->capacity = new_capacity; | |
| 48 } | |
| 49 } | |
| 50 } | |
| 51 | |
| 52 static void buffer_append(StringBuffer *buf, const char *str) | |
| 53 { | |
| 54 size_t len = strlen(str); | |
| 55 buffer_grow(buf, len); | |
| 56 memcpy(buf->data + buf->length, str, len + 1); | |
| 57 buf->length += len; | |
| 58 } | |
| 59 | |
| 60 static void buffer_append_n(StringBuffer *buf, const char *str, size_t n) | |
| 61 { | |
| 62 buffer_grow(buf, n); | |
| 63 memcpy(buf->data + buf->length, str, n); | |
| 64 buf->length += n; | |
| 65 buf->data[buf->length] = '\0'; | |
| 66 } | |
| 67 | |
| 68 static void buffer_append_char(StringBuffer *buf, char c) | |
| 69 { | |
| 70 buffer_grow(buf, 1); | |
| 71 buf->data[buf->length++] = c; | |
| 72 buf->data[buf->length] = '\0'; | |
| 73 } | |
| 74 | |
| 75 static void buffer_free(StringBuffer *buf) | |
| 76 { | |
| 77 if (buf) { | |
| 78 free(buf->data); | |
| 79 free(buf); | |
| 80 } | |
| 81 } | |
| 82 | |
| 83 // Check if line starts with pattern (after trimming whitespace) | |
| 84 static int starts_with(const char *line, const char *pattern) | |
| 85 { | |
| 86 while (*line && isspace((unsigned char)*line)) line++; | |
| 87 return strncmp(line, pattern, strlen(pattern)) == 0; | |
| 88 } | |
| 89 | |
| 90 // Count leading # characters | |
| 91 static int count_heading_level(const char *line) | |
| 92 { | |
| 93 int count = 0; | |
| 94 while (*line && isspace((unsigned char)*line)) line++; | |
| 95 while (line[count] == '#' && count < 6) count++; | |
| 96 if (count > 0 && line[count] == ' ') return count; | |
| 97 return 0; | |
| 98 } | |
| 99 | |
| 100 // Skip whitespace | |
| 101 static const char *skip_whitespace(const char *str) | |
| 102 { | |
| 103 while (*str && isspace((unsigned char)*str)) str++; | |
| 104 return str; | |
| 105 } | |
| 106 | |
| 107 // Check if line is empty (only whitespace) | |
| 108 static int is_empty_line(const char *line) | |
| 109 { | |
| 110 while (*line) { | |
| 111 if (!isspace((unsigned char)*line)) return 0; | |
| 112 line++; | |
| 113 } | |
| 114 return 1; | |
| 115 } | |
| 116 | |
| 117 // Check if line is horizontal rule (---, ***, ___) | |
| 118 static int is_horizontal_rule(const char *line) | |
| 119 { | |
| 120 line = skip_whitespace(line); | |
| 121 char first = *line; | |
| 122 if (first != '-' && first != '*' && first != '_') return 0; | |
| 123 | |
| 124 int count = 0; | |
| 125 while (*line) { | |
| 126 if (*line == first) count++; | |
| 127 else if (!isspace((unsigned char)*line)) return 0; | |
| 128 line++; | |
| 129 } | |
| 130 return count >= 3; | |
| 131 } | |
| 132 | |
| 133 // Check if line is unordered list item | |
| 134 static int is_unordered_list(const char *line) | |
| 135 { | |
| 136 line = skip_whitespace(line); | |
| 137 return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' '; | |
| 138 } | |
| 139 | |
| 140 // Check if line is ordered list item | |
| 141 static int is_ordered_list(const char *line) | |
| 142 { | |
| 143 line = skip_whitespace(line); | |
| 144 while (*line && isdigit((unsigned char)*line)) line++; | |
| 145 return *line == '.' && line[1] == ' '; | |
| 146 } | |
| 147 | |
| 148 // Process inline markdown (bold, italic, code, links, strikethrough) | |
| 149 static void process_inline(StringBuffer *buf, const char *text, size_t len) | |
| 150 { | |
| 151 size_t i = 0; | |
| 152 | |
| 153 while (i < len) { | |
| 154 // Links: [text](url) | |
| 155 if (text[i] == '[') { | |
| 156 size_t link_start = i + 1; | |
| 157 size_t link_end = link_start; | |
| 158 while (link_end < len && text[link_end] != ']') link_end++; | |
| 159 | |
| 160 if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') { | |
| 161 size_t url_start = link_end + 2; | |
| 162 size_t url_end = url_start; | |
| 163 while (url_end < len && text[url_end] != ')') url_end++; | |
| 164 | |
| 165 if (url_end < len) { | |
| 166 buffer_append(buf, "<a href=\""); | |
| 167 buffer_append_n(buf, text + url_start, url_end - url_start); | |
| 168 buffer_append(buf, "\">"); | |
| 169 buffer_append_n(buf, text + link_start, link_end - link_start); | |
| 170 buffer_append(buf, "</a>"); | |
| 171 i = url_end + 1; | |
| 172 continue; | |
| 173 } | |
| 174 } | |
| 175 } | |
| 176 | |
| 177 // Images:  | |
| 178 if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') { | |
| 179 size_t alt_start = i + 2; | |
| 180 size_t alt_end = alt_start; | |
| 181 while (alt_end < len && text[alt_end] != ']') alt_end++; | |
| 182 | |
| 183 if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') { | |
| 184 size_t url_start = alt_end + 2; | |
| 185 size_t url_end = url_start; | |
| 186 while (url_end < len && text[url_end] != ')') url_end++; | |
| 187 | |
| 188 if (url_end < len) { | |
| 189 buffer_append(buf, "<img src=\""); | |
| 190 buffer_append_n(buf, text + url_start, url_end - url_start); | |
| 191 buffer_append(buf, "\" alt=\""); | |
| 192 buffer_append_n(buf, text + alt_start, alt_end - alt_start); | |
| 193 buffer_append(buf, "\">"); | |
| 194 i = url_end + 1; | |
| 195 continue; | |
| 196 } | |
| 197 } | |
| 198 } | |
| 199 | |
| 200 // Bold: **text** or __text__ | |
| 201 if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') || | |
| 202 (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) { | |
| 203 char marker = text[i]; | |
| 204 size_t start = i + 2; | |
| 205 size_t end = start; | |
| 206 while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++; | |
| 207 | |
| 208 if (end + 1 < len) { | |
| 209 buffer_append(buf, "<strong>"); | |
| 210 process_inline(buf, text + start, end - start); | |
| 211 buffer_append(buf, "</strong>"); | |
| 212 i = end + 2; | |
| 213 continue; | |
| 214 } | |
| 215 } | |
| 216 | |
| 217 // Strikethrough: ~~text~~ | |
| 218 if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') { | |
| 219 size_t start = i + 2; | |
| 220 size_t end = start; | |
| 221 while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++; | |
| 222 | |
| 223 if (end + 1 < len) { | |
| 224 buffer_append(buf, "<del>"); | |
| 225 process_inline(buf, text + start, end - start); | |
| 226 buffer_append(buf, "</del>"); | |
| 227 i = end + 2; | |
| 228 continue; | |
| 229 } | |
| 230 } | |
| 231 | |
| 232 // Italic: *text* or _text_ | |
| 233 if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace((unsigned char)text[i + 1])) { | |
| 234 char marker = text[i]; | |
| 235 size_t start = i + 1; | |
| 236 size_t end = start; | |
| 237 while (end < len && text[end] != marker) end++; | |
| 238 | |
| 239 if (end < len && end > start) { | |
| 240 buffer_append(buf, "<em>"); | |
| 241 process_inline(buf, text + start, end - start); | |
| 242 buffer_append(buf, "</em>"); | |
| 243 i = end + 1; | |
| 244 continue; | |
| 245 } | |
| 246 } | |
| 247 | |
| 248 // Inline code: `code` | |
| 249 if (text[i] == '`') { | |
| 250 size_t start = i + 1; | |
| 251 size_t end = start; | |
| 252 while (end < len && text[end] != '`') end++; | |
| 253 | |
| 254 if (end < len) { | |
| 255 buffer_append(buf, "<code>"); | |
| 256 buffer_append_n(buf, text + start, end - start); | |
| 257 buffer_append(buf, "</code>"); | |
| 258 i = end + 1; | |
| 259 continue; | |
| 260 } | |
| 261 } | |
| 262 | |
| 263 // HTML escape special characters | |
| 264 if (text[i] == '<') { | |
| 265 buffer_append(buf, "<"); | |
| 266 } else if (text[i] == '>') { | |
| 267 buffer_append(buf, ">"); | |
| 268 } else if (text[i] == '&') { | |
| 269 buffer_append(buf, "&"); | |
| 270 } else { | |
| 271 buffer_append_char(buf, text[i]); | |
| 272 } | |
| 273 i++; | |
| 274 } | |
| 275 } | |
| 276 | |
| 277 // Convert markdown to HTML | |
| 278 MDAPI char *markdown_to_html(const char *markdown) | |
| 279 { | |
| 280 if (!markdown) return NULL; | |
| 281 | |
| 282 StringBuffer *buf = buffer_create(INITIAL_BUFFER_SIZE); | |
| 283 if (!buf) return NULL; | |
| 284 | |
| 285 const char *ptr = markdown; | |
| 286 const char *line_start; | |
| 287 | |
| 288 while (*ptr) { | |
| 289 line_start = ptr; | |
| 290 | |
| 291 // Find end of line | |
| 292 while (*ptr && *ptr != '\n') ptr++; | |
| 293 size_t line_len = ptr - line_start; | |
| 294 | |
| 295 // Create null-terminated line copy | |
| 296 char *line = (char *)malloc(line_len + 1); | |
| 297 if (!line) { | |
| 298 buffer_free(buf); | |
| 299 return NULL; | |
| 300 } | |
| 301 memcpy(line, line_start, line_len); | |
| 302 line[line_len] = '\0'; | |
| 303 | |
| 304 // Skip empty lines | |
| 305 if (is_empty_line(line)) { | |
| 306 free(line); | |
| 307 if (*ptr == '\n') ptr++; | |
| 308 continue; | |
| 309 } | |
| 310 | |
| 311 // Headings: # H1, ## H2, etc. | |
| 312 int heading_level = count_heading_level(line); | |
| 313 if (heading_level > 0) { | |
| 314 const char *content = skip_whitespace(line); | |
| 315 while (*content == '#') content++; | |
| 316 content = skip_whitespace(content); | |
| 317 | |
| 318 char tag[8]; | |
| 319 snprintf(tag, sizeof(tag), "<h%d>", heading_level); | |
| 320 buffer_append(buf, tag); | |
| 321 process_inline(buf, content, strlen(content)); | |
| 322 snprintf(tag, sizeof(tag), "</h%d>", heading_level); | |
| 323 buffer_append(buf, tag); | |
| 324 | |
| 325 free(line); | |
| 326 if (*ptr == '\n') ptr++; | |
| 327 continue; | |
| 328 } | |
| 329 | |
| 330 // Code block: ``` | |
| 331 if (starts_with(line, "```")) { | |
| 332 buffer_append(buf, "<pre><code>"); | |
| 333 free(line); | |
| 334 if (*ptr == '\n') ptr++; | |
| 335 | |
| 336 // Collect code content | |
| 337 while (*ptr) { | |
| 338 line_start = ptr; | |
| 339 while (*ptr && *ptr != '\n') ptr++; | |
| 340 line_len = ptr - line_start; | |
| 341 | |
| 342 line = (char *)malloc(line_len + 1); | |
| 343 if (!line) break; | |
| 344 memcpy(line, line_start, line_len); | |
| 345 line[line_len] = '\0'; | |
| 346 | |
| 347 if (starts_with(line, "```")) { | |
| 348 free(line); | |
| 349 if (*ptr == '\n') ptr++; | |
| 350 break; | |
| 351 } | |
| 352 | |
| 353 // Escape HTML in code blocks | |
| 354 for (size_t i = 0; i < line_len; i++) { | |
| 355 if (line[i] == '<') buffer_append(buf, "<"); | |
| 356 else if (line[i] == '>') buffer_append(buf, ">"); | |
| 357 else if (line[i] == '&') buffer_append(buf, "&"); | |
| 358 else buffer_append_char(buf, line[i]); | |
| 359 } | |
| 360 buffer_append_char(buf, '\n'); | |
| 361 | |
| 362 free(line); | |
| 363 if (*ptr == '\n') ptr++; | |
| 364 } | |
| 365 | |
| 366 buffer_append(buf, "</code></pre>"); | |
| 367 continue; | |
| 368 } | |
| 369 | |
| 370 // Blockquote: > | |
| 371 if (starts_with(line, ">")) { | |
| 372 buffer_append(buf, "<blockquote>"); | |
| 373 | |
| 374 while (1) { | |
| 375 const char *content = skip_whitespace(line); | |
| 376 if (*content == '>') content++; | |
| 377 content = skip_whitespace(content); | |
| 378 process_inline(buf, content, strlen(content)); | |
| 379 buffer_append_char(buf, ' '); | |
| 380 | |
| 381 free(line); | |
| 382 if (*ptr == '\n') ptr++; | |
| 383 | |
| 384 // Check next line | |
| 385 if (!*ptr) break; | |
| 386 line_start = ptr; | |
| 387 while (*ptr && *ptr != '\n') ptr++; | |
| 388 line_len = ptr - line_start; | |
| 389 | |
| 390 line = (char *)malloc(line_len + 1); | |
| 391 if (!line) break; | |
| 392 memcpy(line, line_start, line_len); | |
| 393 line[line_len] = '\0'; | |
| 394 | |
| 395 if (!starts_with(line, ">")) { | |
| 396 // Put back the line pointer | |
| 397 ptr = line_start; | |
| 398 free(line); | |
| 399 break; | |
| 400 } | |
| 401 } | |
| 402 | |
| 403 buffer_append(buf, "</blockquote>"); | |
| 404 continue; | |
| 405 } | |
| 406 | |
| 407 // Horizontal rule | |
| 408 if (is_horizontal_rule(line)) { | |
| 409 buffer_append(buf, "<hr>"); | |
| 410 free(line); | |
| 411 if (*ptr == '\n') ptr++; | |
| 412 continue; | |
| 413 } | |
| 414 | |
| 415 // Unordered list | |
| 416 if (is_unordered_list(line)) { | |
| 417 buffer_append(buf, "<ul>"); | |
| 418 | |
| 419 while (1) { | |
| 420 const char *content = skip_whitespace(line); | |
| 421 content += 2; // Skip "- " or "* " or "+ " | |
| 422 | |
| 423 buffer_append(buf, "<li>"); | |
| 424 process_inline(buf, content, strlen(content)); | |
| 425 buffer_append(buf, "</li>"); | |
| 426 | |
| 427 free(line); | |
| 428 if (*ptr == '\n') ptr++; | |
| 429 | |
| 430 // Check next line | |
| 431 if (!*ptr) break; | |
| 432 line_start = ptr; | |
| 433 while (*ptr && *ptr != '\n') ptr++; | |
| 434 line_len = ptr - line_start; | |
| 435 | |
| 436 line = (char *)malloc(line_len + 1); | |
| 437 if (!line) break; | |
| 438 memcpy(line, line_start, line_len); | |
| 439 line[line_len] = '\0'; | |
| 440 | |
| 441 if (!is_unordered_list(line)) { | |
| 442 ptr = line_start; | |
| 443 free(line); | |
| 444 break; | |
| 445 } | |
| 446 } | |
| 447 | |
| 448 buffer_append(buf, "</ul>"); | |
| 449 continue; | |
| 450 } | |
| 451 | |
| 452 // Ordered list | |
| 453 if (is_ordered_list(line)) { | |
| 454 buffer_append(buf, "<ol>"); | |
| 455 | |
| 456 while (1) { | |
| 457 const char *content = skip_whitespace(line); | |
| 458 while (*content && isdigit((unsigned char)*content)) content++; | |
| 459 if (*content == '.') content++; | |
| 460 content = skip_whitespace(content); | |
| 461 | |
| 462 buffer_append(buf, "<li>"); | |
| 463 process_inline(buf, content, strlen(content)); | |
| 464 buffer_append(buf, "</li>"); | |
| 465 | |
| 466 free(line); | |
| 467 if (*ptr == '\n') ptr++; | |
| 468 | |
| 469 // Check next line | |
| 470 if (!*ptr) break; | |
| 471 line_start = ptr; | |
| 472 while (*ptr && *ptr != '\n') ptr++; | |
| 473 line_len = ptr - line_start; | |
| 474 | |
| 475 line = (char *)malloc(line_len + 1); | |
| 476 if (!line) break; | |
| 477 memcpy(line, line_start, line_len); | |
| 478 line[line_len] = '\0'; | |
| 479 | |
| 480 if (!is_ordered_list(line)) { | |
| 481 ptr = line_start; | |
| 482 free(line); | |
| 483 break; | |
| 484 } | |
| 485 } | |
| 486 | |
| 487 buffer_append(buf, "</ol>"); | |
| 488 continue; | |
| 489 } | |
| 490 | |
| 491 // Regular paragraph | |
| 492 buffer_append(buf, "<p>"); | |
| 493 | |
| 494 while (1) { | |
| 495 const char *content = skip_whitespace(line); | |
| 496 process_inline(buf, content, strlen(content)); | |
| 497 | |
| 498 free(line); | |
| 499 if (*ptr == '\n') ptr++; | |
| 500 | |
| 501 // Check next line - continue paragraph if not special | |
| 502 if (!*ptr) break; | |
| 503 line_start = ptr; | |
| 504 while (*ptr && *ptr != '\n') ptr++; | |
| 505 line_len = ptr - line_start; | |
| 506 | |
| 507 line = (char *)malloc(line_len + 1); | |
| 508 if (!line) break; | |
| 509 memcpy(line, line_start, line_len); | |
| 510 line[line_len] = '\0'; | |
| 511 | |
| 512 if (is_empty_line(line) || | |
| 513 count_heading_level(line) > 0 || | |
| 514 starts_with(line, "```") || | |
| 515 starts_with(line, ">") || | |
| 516 is_horizontal_rule(line) || | |
| 517 is_unordered_list(line) || | |
| 518 is_ordered_list(line)) { | |
| 519 ptr = line_start; | |
| 520 free(line); | |
| 521 break; | |
| 522 } | |
| 523 | |
| 524 buffer_append_char(buf, ' '); | |
| 525 } | |
| 526 | |
| 527 buffer_append(buf, "</p>"); | |
| 528 } | |
| 529 | |
| 530 char *result = buf->data; | |
| 531 free(buf); // Free struct but not data | |
| 532 return result; | |
| 533 } | |
| 534 | |
| 535 // Free the returned HTML string | |
| 536 MDAPI void markdown_free(char *html) | |
| 537 { | |
| 538 free(html); | |
| 539 } | |
| 540 | |
| 541 // Get length of HTML string (for WASM memory allocation) | |
| 542 MDAPI size_t markdown_get_length(const char *html) | |
| 543 { | |
| 544 return html ? strlen(html) : 0; | |
| 545 } |