Mercurial
comparison markdown_converter/markdown_to_html.c @ 173:827c6ac504cd hg-web
Merged in default here.
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Mon, 19 Jan 2026 18:59:10 -0800 |
| parents | 1c0878eb17de |
| children | 8c74204fd362 |
comparison
equal
deleted
inserted
replaced
| 151:c033667da5f9 | 173:827c6ac504cd |
|---|---|
| 1 #include <string.h> | |
| 2 #include <stdlib.h> | |
| 3 #include <stdio.h> | |
| 4 #include <ctype.h> | |
| 5 #include "markdown_converter/markdown_to_html.h" | |
| 6 | |
| 7 #define INITIAL_BUFFER_SIZE 1024 * 1024 // 1MB | |
| 8 | |
| 9 // String buffer for building HTML output | |
| 10 typedef struct { | |
| 11 char *data; | |
| 12 size_t length; | |
| 13 size_t capacity; | |
| 14 } StringBuffer; | |
| 15 | |
| 16 static StringBuffer *buffer_create(size_t initial_capacity) | |
| 17 { | |
| 18 StringBuffer *buf = (StringBuffer *)malloc(sizeof(StringBuffer)); | |
| 19 if (!buf) return NULL; | |
| 20 | |
| 21 buf->data = (char *)malloc(initial_capacity); | |
| 22 if (!buf->data) { | |
| 23 free(buf); | |
| 24 return NULL; | |
| 25 } | |
| 26 buf->data[0] = '\0'; | |
| 27 buf->length = 0; | |
| 28 buf->capacity = initial_capacity; | |
| 29 return buf; | |
| 30 } | |
| 31 | |
| 32 static void buffer_grow(StringBuffer *buf, size_t needed) | |
| 33 { | |
| 34 if (buf->length + needed + 1 > buf->capacity) { | |
| 35 size_t new_capacity = buf->capacity * 2; | |
| 36 while (new_capacity < buf->length + needed + 1) | |
| 37 new_capacity *= 2; | |
| 38 | |
| 39 char *new_data = (char *)realloc(buf->data, new_capacity); | |
| 40 if (new_data) { | |
| 41 buf->data = new_data; | |
| 42 buf->capacity = new_capacity; | |
| 43 } | |
| 44 } | |
| 45 } | |
| 46 | |
| 47 static void buffer_append(StringBuffer *buf, const char *str) | |
| 48 { | |
| 49 size_t len = strlen(str); | |
| 50 buffer_grow(buf, len); | |
| 51 memcpy(buf->data + buf->length, str, len + 1); | |
| 52 buf->length += len; | |
| 53 } | |
| 54 | |
| 55 static void buffer_append_n(StringBuffer *buf, const char *str, size_t n) | |
| 56 { | |
| 57 buffer_grow(buf, n); | |
| 58 memcpy(buf->data + buf->length, str, n); | |
| 59 buf->length += n; | |
| 60 buf->data[buf->length] = '\0'; | |
| 61 } | |
| 62 | |
| 63 static void buffer_append_char(StringBuffer *buf, char c) | |
| 64 { | |
| 65 buffer_grow(buf, 1); | |
| 66 buf->data[buf->length++] = c; | |
| 67 buf->data[buf->length] = '\0'; | |
| 68 } | |
| 69 | |
| 70 static void buffer_free(StringBuffer *buf) | |
| 71 { | |
| 72 if (buf) { | |
| 73 free(buf->data); | |
| 74 free(buf); | |
| 75 } | |
| 76 } | |
| 77 | |
| 78 // Check if line starts with pattern (after trimming whitespace) | |
| 79 static int starts_with(const char *line, const char *pattern) | |
| 80 { | |
| 81 while (*line && isspace((unsigned char)*line)) line++; | |
| 82 return strncmp(line, pattern, strlen(pattern)) == 0; | |
| 83 } | |
| 84 | |
| 85 // Count leading # characters | |
| 86 static int count_heading_level(const char *line) | |
| 87 { | |
| 88 int count = 0; | |
| 89 while (*line && isspace((unsigned char)*line)) line++; | |
| 90 while (line[count] == '#' && count < 6) count++; | |
| 91 if (count > 0 && line[count] == ' ') return count; | |
| 92 return 0; | |
| 93 } | |
| 94 | |
| 95 // Skip whitespace | |
| 96 static const char *skip_whitespace(const char *str) | |
| 97 { | |
| 98 while (*str && isspace((unsigned char)*str)) str++; | |
| 99 return str; | |
| 100 } | |
| 101 | |
| 102 // Check if line is empty (only whitespace) | |
| 103 static int is_empty_line(const char *line) | |
| 104 { | |
| 105 while (*line) { | |
| 106 if (!isspace((unsigned char)*line)) return 0; | |
| 107 line++; | |
| 108 } | |
| 109 return 1; | |
| 110 } | |
| 111 | |
| 112 // Check if line is horizontal rule (---, ***, ___) | |
| 113 static int is_horizontal_rule(const char *line) | |
| 114 { | |
| 115 line = skip_whitespace(line); | |
| 116 char first = *line; | |
| 117 if (first != '-' && first != '*' && first != '_') return 0; | |
| 118 | |
| 119 int count = 0; | |
| 120 while (*line) { | |
| 121 if (*line == first) count++; | |
| 122 else if (!isspace((unsigned char)*line)) return 0; | |
| 123 line++; | |
| 124 } | |
| 125 return count >= 3; | |
| 126 } | |
| 127 | |
| 128 // Check if line is unordered list item | |
| 129 static int is_unordered_list(const char *line) | |
| 130 { | |
| 131 line = skip_whitespace(line); | |
| 132 return (*line == '-' || *line == '*' || *line == '+') && line[1] == ' '; | |
| 133 } | |
| 134 | |
| 135 // Check if line is ordered list item | |
| 136 static int is_ordered_list(const char *line) | |
| 137 { | |
| 138 line = skip_whitespace(line); | |
| 139 while (*line && isdigit((unsigned char)*line)) line++; | |
| 140 return *line == '.' && line[1] == ' '; | |
| 141 } | |
| 142 | |
| 143 // Process inline markdown (bold, italic, code, links, strikethrough) | |
| 144 static void process_inline(StringBuffer *buf, const char *text, size_t len) | |
| 145 { | |
| 146 size_t i = 0; | |
| 147 | |
| 148 while (i < len) { | |
| 149 // Links: [text](url) | |
| 150 if (text[i] == '[') { | |
| 151 size_t link_start = i + 1; | |
| 152 size_t link_end = link_start; | |
| 153 while (link_end < len && text[link_end] != ']') link_end++; | |
| 154 | |
| 155 if (link_end < len && link_end + 1 < len && text[link_end + 1] == '(') { | |
| 156 size_t url_start = link_end + 2; | |
| 157 size_t url_end = url_start; | |
| 158 while (url_end < len && text[url_end] != ')') url_end++; | |
| 159 | |
| 160 if (url_end < len) { | |
| 161 buffer_append(buf, "<a href=\""); | |
| 162 buffer_append_n(buf, text + url_start, url_end - url_start); | |
| 163 buffer_append(buf, "\">"); | |
| 164 buffer_append_n(buf, text + link_start, link_end - link_start); | |
| 165 buffer_append(buf, "</a>"); | |
| 166 i = url_end + 1; | |
| 167 continue; | |
| 168 } | |
| 169 } | |
| 170 } | |
| 171 | |
| 172 // Images:  | |
| 173 if (text[i] == '!' && i + 1 < len && text[i + 1] == '[') { | |
| 174 size_t alt_start = i + 2; | |
| 175 size_t alt_end = alt_start; | |
| 176 while (alt_end < len && text[alt_end] != ']') alt_end++; | |
| 177 | |
| 178 if (alt_end < len && alt_end + 1 < len && text[alt_end + 1] == '(') { | |
| 179 size_t url_start = alt_end + 2; | |
| 180 size_t url_end = url_start; | |
| 181 while (url_end < len && text[url_end] != ')') url_end++; | |
| 182 | |
| 183 if (url_end < len) { | |
| 184 buffer_append(buf, "<img src=\""); | |
| 185 buffer_append_n(buf, text + url_start, url_end - url_start); | |
| 186 buffer_append(buf, "\" alt=\""); | |
| 187 buffer_append_n(buf, text + alt_start, alt_end - alt_start); | |
| 188 buffer_append(buf, "\">"); | |
| 189 i = url_end + 1; | |
| 190 continue; | |
| 191 } | |
| 192 } | |
| 193 } | |
| 194 | |
| 195 // Bold: **text** or __text__ | |
| 196 if ((text[i] == '*' && i + 1 < len && text[i + 1] == '*') || | |
| 197 (text[i] == '_' && i + 1 < len && text[i + 1] == '_')) { | |
| 198 char marker = text[i]; | |
| 199 size_t start = i + 2; | |
| 200 size_t end = start; | |
| 201 while (end + 1 < len && !(text[end] == marker && text[end + 1] == marker)) end++; | |
| 202 | |
| 203 if (end + 1 < len) { | |
| 204 buffer_append(buf, "<strong>"); | |
| 205 process_inline(buf, text + start, end - start); | |
| 206 buffer_append(buf, "</strong>"); | |
| 207 i = end + 2; | |
| 208 continue; | |
| 209 } | |
| 210 } | |
| 211 | |
| 212 // Strikethrough: ~~text~~ | |
| 213 if (text[i] == '~' && i + 1 < len && text[i + 1] == '~') { | |
| 214 size_t start = i + 2; | |
| 215 size_t end = start; | |
| 216 while (end + 1 < len && !(text[end] == '~' && text[end + 1] == '~')) end++; | |
| 217 | |
| 218 if (end + 1 < len) { | |
| 219 buffer_append(buf, "<del>"); | |
| 220 process_inline(buf, text + start, end - start); | |
| 221 buffer_append(buf, "</del>"); | |
| 222 i = end + 2; | |
| 223 continue; | |
| 224 } | |
| 225 } | |
| 226 | |
| 227 // Italic: *text* or _text_ | |
| 228 if ((text[i] == '*' || text[i] == '_') && i + 1 < len && !isspace((unsigned char)text[i + 1])) { | |
| 229 char marker = text[i]; | |
| 230 size_t start = i + 1; | |
| 231 size_t end = start; | |
| 232 while (end < len && text[end] != marker) end++; | |
| 233 | |
| 234 if (end < len && end > start) { | |
| 235 buffer_append(buf, "<em>"); | |
| 236 process_inline(buf, text + start, end - start); | |
| 237 buffer_append(buf, "</em>"); | |
| 238 i = end + 1; | |
| 239 continue; | |
| 240 } | |
| 241 } | |
| 242 | |
| 243 // Inline code: `code` | |
| 244 if (text[i] == '`') { | |
| 245 size_t start = i + 1; | |
| 246 size_t end = start; | |
| 247 while (end < len && text[end] != '`') end++; | |
| 248 | |
| 249 if (end < len) { | |
| 250 buffer_append(buf, "<code>"); | |
| 251 buffer_append_n(buf, text + start, end - start); | |
| 252 buffer_append(buf, "</code>"); | |
| 253 i = end + 1; | |
| 254 continue; | |
| 255 } | |
| 256 } | |
| 257 | |
| 258 // This might not be needed for now. | |
| 259 // HTML escape special characters | |
| 260 // if (text[i] == '<') { | |
| 261 // buffer_append(buf, "<"); | |
| 262 // } else if (text[i] == '>') { | |
| 263 // buffer_append(buf, ">"); | |
| 264 // } else if (text[i] == '&') { | |
| 265 // buffer_append(buf, "&"); | |
| 266 // } else { | |
| 267 // buffer_append_char(buf, text[i]); | |
| 268 // } | |
| 269 buffer_append_char(buf, text[i]); | |
| 270 i++; | |
| 271 } | |
| 272 } | |
| 273 | |
| 274 // Convert markdown to HTML | |
| 275 MDAPI char *markdown_to_html(const char *markdown) | |
| 276 { | |
| 277 if (!markdown) return NULL; | |
| 278 | |
| 279 StringBuffer *buf = buffer_create(INITIAL_BUFFER_SIZE); | |
| 280 if (!buf) return NULL; | |
| 281 | |
| 282 const char *ptr = markdown; | |
| 283 const char *line_start; | |
| 284 | |
| 285 while (*ptr) { | |
| 286 line_start = ptr; | |
| 287 | |
| 288 // Find end of line | |
| 289 while (*ptr && *ptr != '\n') ptr++; | |
| 290 size_t line_len = ptr - line_start; | |
| 291 | |
| 292 // Create null-terminated line copy | |
| 293 char *line = (char *)malloc(line_len + 1); | |
| 294 if (!line) { | |
| 295 buffer_free(buf); | |
| 296 return NULL; | |
| 297 } | |
| 298 memcpy(line, line_start, line_len); | |
| 299 line[line_len] = '\0'; | |
| 300 | |
| 301 // Skip empty lines | |
| 302 if (is_empty_line(line)) { | |
| 303 free(line); | |
| 304 if (*ptr == '\n') ptr++; | |
| 305 continue; | |
| 306 } | |
| 307 | |
| 308 // Headings: # H1, ## H2, etc. | |
| 309 int heading_level = count_heading_level(line); | |
| 310 if (heading_level > 0) { | |
| 311 const char *content = skip_whitespace(line); | |
| 312 while (*content == '#') content++; | |
| 313 content = skip_whitespace(content); | |
| 314 | |
| 315 char tag[8]; | |
| 316 snprintf(tag, sizeof(tag), "<h%d>", heading_level); | |
| 317 buffer_append(buf, tag); | |
| 318 process_inline(buf, content, strlen(content)); | |
| 319 snprintf(tag, sizeof(tag), "</h%d>", heading_level); | |
| 320 buffer_append(buf, tag); | |
| 321 | |
| 322 free(line); | |
| 323 if (*ptr == '\n') ptr++; | |
| 324 continue; | |
| 325 } | |
| 326 | |
| 327 // Code block: ``` | |
| 328 if (starts_with(line, "```")) { | |
| 329 buffer_append(buf, "<pre><code>"); | |
| 330 free(line); | |
| 331 if (*ptr == '\n') ptr++; | |
| 332 | |
| 333 // Collect code content | |
| 334 while (*ptr) { | |
| 335 line_start = ptr; | |
| 336 while (*ptr && *ptr != '\n') ptr++; | |
| 337 line_len = ptr - line_start; | |
| 338 | |
| 339 line = (char *)malloc(line_len + 1); | |
| 340 if (!line) break; | |
| 341 memcpy(line, line_start, line_len); | |
| 342 line[line_len] = '\0'; | |
| 343 | |
| 344 if (starts_with(line, "```")) { | |
| 345 free(line); | |
| 346 if (*ptr == '\n') ptr++; | |
| 347 break; | |
| 348 } | |
| 349 | |
| 350 // Escape HTML in code blocks | |
| 351 for (size_t i = 0; i < line_len; i++) { | |
| 352 if (line[i] == '<') buffer_append(buf, "<"); | |
| 353 else if (line[i] == '>') buffer_append(buf, ">"); | |
| 354 else if (line[i] == '&') buffer_append(buf, "&"); | |
| 355 else buffer_append_char(buf, line[i]); | |
| 356 } | |
| 357 buffer_append_char(buf, '\n'); | |
| 358 | |
| 359 free(line); | |
| 360 if (*ptr == '\n') ptr++; | |
| 361 } | |
| 362 | |
| 363 buffer_append(buf, "</code></pre>"); | |
| 364 continue; | |
| 365 } | |
| 366 | |
| 367 // Blockquote: > | |
| 368 if (starts_with(line, ">")) { | |
| 369 buffer_append(buf, "<blockquote>"); | |
| 370 | |
| 371 while (1) { | |
| 372 const char *content = skip_whitespace(line); | |
| 373 if (*content == '>') content++; | |
| 374 content = skip_whitespace(content); | |
| 375 process_inline(buf, content, strlen(content)); | |
| 376 buffer_append_char(buf, ' '); | |
| 377 | |
| 378 free(line); | |
| 379 if (*ptr == '\n') ptr++; | |
| 380 | |
| 381 // Check next line | |
| 382 if (!*ptr) break; | |
| 383 line_start = ptr; | |
| 384 while (*ptr && *ptr != '\n') ptr++; | |
| 385 line_len = ptr - line_start; | |
| 386 | |
| 387 line = (char *)malloc(line_len + 1); | |
| 388 if (!line) break; | |
| 389 memcpy(line, line_start, line_len); | |
| 390 line[line_len] = '\0'; | |
| 391 | |
| 392 if (!starts_with(line, ">")) { | |
| 393 // Put back the line pointer | |
| 394 ptr = line_start; | |
| 395 free(line); | |
| 396 break; | |
| 397 } | |
| 398 } | |
| 399 | |
| 400 buffer_append(buf, "</blockquote>"); | |
| 401 continue; | |
| 402 } | |
| 403 | |
| 404 // Horizontal rule | |
| 405 if (is_horizontal_rule(line)) { | |
| 406 buffer_append(buf, "<hr>"); | |
| 407 free(line); | |
| 408 if (*ptr == '\n') ptr++; | |
| 409 continue; | |
| 410 } | |
| 411 | |
| 412 // Unordered list | |
| 413 if (is_unordered_list(line)) { | |
| 414 buffer_append(buf, "<ul>"); | |
| 415 | |
| 416 while (1) { | |
| 417 const char *content = skip_whitespace(line); | |
| 418 content += 2; // Skip "- " or "* " or "+ " | |
| 419 | |
| 420 buffer_append(buf, "<li>"); | |
| 421 process_inline(buf, content, strlen(content)); | |
| 422 buffer_append(buf, "</li>"); | |
| 423 | |
| 424 free(line); | |
| 425 if (*ptr == '\n') ptr++; | |
| 426 | |
| 427 // Check next line | |
| 428 if (!*ptr) break; | |
| 429 line_start = ptr; | |
| 430 while (*ptr && *ptr != '\n') ptr++; | |
| 431 line_len = ptr - line_start; | |
| 432 | |
| 433 line = (char *)malloc(line_len + 1); | |
| 434 if (!line) break; | |
| 435 memcpy(line, line_start, line_len); | |
| 436 line[line_len] = '\0'; | |
| 437 | |
| 438 if (!is_unordered_list(line)) { | |
| 439 ptr = line_start; | |
| 440 free(line); | |
| 441 break; | |
| 442 } | |
| 443 } | |
| 444 | |
| 445 buffer_append(buf, "</ul>"); | |
| 446 continue; | |
| 447 } | |
| 448 | |
| 449 // Ordered list | |
| 450 if (is_ordered_list(line)) { | |
| 451 buffer_append(buf, "<ol>"); | |
| 452 | |
| 453 while (1) { | |
| 454 const char *content = skip_whitespace(line); | |
| 455 while (*content && isdigit((unsigned char)*content)) content++; | |
| 456 if (*content == '.') content++; | |
| 457 content = skip_whitespace(content); | |
| 458 | |
| 459 buffer_append(buf, "<li>"); | |
| 460 process_inline(buf, content, strlen(content)); | |
| 461 buffer_append(buf, "</li>"); | |
| 462 | |
| 463 free(line); | |
| 464 if (*ptr == '\n') ptr++; | |
| 465 | |
| 466 // Check next line | |
| 467 if (!*ptr) break; | |
| 468 line_start = ptr; | |
| 469 while (*ptr && *ptr != '\n') ptr++; | |
| 470 line_len = ptr - line_start; | |
| 471 | |
| 472 line = (char *)malloc(line_len + 1); | |
| 473 if (!line) break; | |
| 474 memcpy(line, line_start, line_len); | |
| 475 line[line_len] = '\0'; | |
| 476 | |
| 477 if (!is_ordered_list(line)) { | |
| 478 ptr = line_start; | |
| 479 free(line); | |
| 480 break; | |
| 481 } | |
| 482 } | |
| 483 | |
| 484 buffer_append(buf, "</ol>"); | |
| 485 continue; | |
| 486 } | |
| 487 | |
| 488 // Regular paragraph | |
| 489 buffer_append(buf, "<p>"); | |
| 490 | |
| 491 while (1) { | |
| 492 const char *content = skip_whitespace(line); | |
| 493 process_inline(buf, content, strlen(content)); | |
| 494 | |
| 495 free(line); | |
| 496 if (*ptr == '\n') ptr++; | |
| 497 | |
| 498 // Check next line - continue paragraph if not special | |
| 499 if (!*ptr) break; | |
| 500 line_start = ptr; | |
| 501 while (*ptr && *ptr != '\n') ptr++; | |
| 502 line_len = ptr - line_start; | |
| 503 | |
| 504 line = (char *)malloc(line_len + 1); | |
| 505 if (!line) break; | |
| 506 memcpy(line, line_start, line_len); | |
| 507 line[line_len] = '\0'; | |
| 508 | |
| 509 if (is_empty_line(line) || | |
| 510 count_heading_level(line) > 0 || | |
| 511 starts_with(line, "```") || | |
| 512 starts_with(line, ">") || | |
| 513 is_horizontal_rule(line) || | |
| 514 is_unordered_list(line) || | |
| 515 is_ordered_list(line)) { | |
| 516 ptr = line_start; | |
| 517 free(line); | |
| 518 break; | |
| 519 } | |
| 520 | |
| 521 buffer_append_char(buf, ' '); | |
| 522 } | |
| 523 | |
| 524 buffer_append(buf, "</p>"); | |
| 525 } | |
| 526 | |
| 527 char *result = buf->data; | |
| 528 free(buf); // Free struct but not data | |
| 529 return result; | |
| 530 } | |
| 531 | |
| 532 // Free the returned HTML string | |
| 533 MDAPI void markdown_free(char *html) | |
| 534 { | |
| 535 free(html); | |
| 536 } | |
| 537 | |
| 538 // Get length of HTML string (for WASM memory allocation) | |
| 539 MDAPI size_t markdown_get_length(const char *html) | |
| 540 { | |
| 541 return html ? strlen(html) : 0; | |
| 542 } |