Mercurial
comparison markdown_converter/markdown_to_html.c @ 195:f8f5004a920a
Merging back hg-web-tip
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Tue, 27 Jan 2026 06:51:44 -0800 |
| parents | a2725419f988 |
| children |
comparison
equal
deleted
inserted
replaced
| 189:14cc84ba35a0 | 195:f8f5004a920a |
|---|---|
| 82 free(buf->data); | 82 free(buf->data); |
| 83 free(buf); | 83 free(buf); |
| 84 } | 84 } |
| 85 } | 85 } |
| 86 | 86 |
| 87 // Forward declaration | |
| 88 static void process_inline(StringBuffer *buf, const char *text, size_t len); | |
| 89 | |
| 87 // Check if line starts with pattern (after trimming whitespace) | 90 // Check if line starts with pattern (after trimming whitespace) |
| 88 static int starts_with(const char *line, const char *pattern) | 91 static int starts_with(const char *line, const char *pattern) |
| 89 { | 92 { |
| 90 while (*line && isspace((unsigned char)*line)) line++; | 93 while (*line && isspace((unsigned char)*line)) line++; |
| 91 return strncmp(line, pattern, strlen(pattern)) == 0; | 94 return strncmp(line, pattern, strlen(pattern)) == 0; |
| 183 static int is_ordered_list(const char *line) | 186 static int is_ordered_list(const char *line) |
| 184 { | 187 { |
| 185 line = skip_whitespace(line); | 188 line = skip_whitespace(line); |
| 186 while (*line && isdigit((unsigned char)*line)) line++; | 189 while (*line && isdigit((unsigned char)*line)) line++; |
| 187 return *line == '.' && line[1] == ' '; | 190 return *line == '.' && line[1] == ' '; |
| 191 } | |
| 192 | |
| 193 // Check if line could be a table row (contains |) | |
| 194 static int is_table_row(const char *line) | |
| 195 { | |
| 196 line = skip_whitespace(line); | |
| 197 // Must contain at least one | | |
| 198 return strchr(line, '|') != NULL; | |
| 199 } | |
| 200 | |
| 201 // Check if line is a table separator (|---|---|) | |
| 202 static int is_table_separator(const char *line) | |
| 203 { | |
| 204 line = skip_whitespace(line); | |
| 205 int has_dash = 0; | |
| 206 int has_pipe = 0; | |
| 207 | |
| 208 while (*line) { | |
| 209 char c = *line; | |
| 210 if (c == '|') has_pipe = 1; | |
| 211 else if (c == '-') has_dash = 1; | |
| 212 else if (c == ':') ; // alignment marker, allowed | |
| 213 else if (isspace((unsigned char)c)) ; // whitespace allowed | |
| 214 else return 0; // invalid character for separator | |
| 215 line++; | |
| 216 } | |
| 217 | |
| 218 return has_dash && has_pipe; | |
| 219 } | |
| 220 | |
| 221 // Parse alignment from separator cell (e.g., ":---:", "---:", ":---") | |
| 222 // Returns: 0 = left (default), 1 = center, 2 = right | |
| 223 static int parse_alignment(const char *cell, size_t len) | |
| 224 { | |
| 225 // Trim whitespace | |
| 226 while (len > 0 && isspace((unsigned char)*cell)) { cell++; len--; } | |
| 227 while (len > 0 && isspace((unsigned char)cell[len-1])) { len--; } | |
| 228 | |
| 229 if (len == 0) return 0; | |
| 230 | |
| 231 int left_colon = (cell[0] == ':'); | |
| 232 int right_colon = (len > 0 && cell[len-1] == ':'); | |
| 233 | |
| 234 if (left_colon && right_colon) return 1; // center | |
| 235 if (right_colon) return 2; // right | |
| 236 return 0; // left (default) | |
| 237 } | |
| 238 | |
| 239 // Count columns in a table row | |
| 240 static int count_table_columns(const char *line) | |
| 241 { | |
| 242 int count = 0; | |
| 243 int in_cell = 0; | |
| 244 line = skip_whitespace(line); | |
| 245 | |
| 246 // Skip leading | | |
| 247 if (*line == '|') line++; | |
| 248 | |
| 249 while (*line) { | |
| 250 if (*line == '|') { | |
| 251 count++; | |
| 252 in_cell = 0; | |
| 253 } else if (!isspace((unsigned char)*line)) { | |
| 254 in_cell = 1; | |
| 255 } | |
| 256 line++; | |
| 257 } | |
| 258 | |
| 259 // Count last cell if there was content after last | | |
| 260 if (in_cell) count++; | |
| 261 | |
| 262 return count > 0 ? count : 1; | |
| 263 } | |
| 264 | |
| 265 // Parse table cells and call callback for each | |
| 266 typedef void (*cell_callback)(StringBuffer *buf, const char *cell, size_t len, int align, int is_header); | |
| 267 | |
| 268 static void parse_table_row(StringBuffer *buf, const char *line, int *alignments, int num_cols, int is_header, cell_callback cb) | |
| 269 { | |
| 270 line = skip_whitespace(line); | |
| 271 | |
| 272 // Skip leading | | |
| 273 if (*line == '|') line++; | |
| 274 | |
| 275 int col = 0; | |
| 276 const char *cell_start = line; | |
| 277 | |
| 278 while (*line && col < num_cols) { | |
| 279 if (*line == '|' || *(line + 1) == '\0') { | |
| 280 // End of cell | |
| 281 size_t cell_len = line - cell_start; | |
| 282 if (*line != '|') cell_len++; // include last char if no trailing | | |
| 283 | |
| 284 // Trim whitespace from cell | |
| 285 while (cell_len > 0 && isspace((unsigned char)*cell_start)) { cell_start++; cell_len--; } | |
| 286 while (cell_len > 0 && isspace((unsigned char)cell_start[cell_len-1])) { cell_len--; } | |
| 287 | |
| 288 int align = (alignments && col < num_cols) ? alignments[col] : 0; | |
| 289 cb(buf, cell_start, cell_len, align, is_header); | |
| 290 | |
| 291 col++; | |
| 292 cell_start = line + 1; | |
| 293 } | |
| 294 line++; | |
| 295 } | |
| 296 | |
| 297 // Fill remaining columns with empty cells | |
| 298 while (col < num_cols) { | |
| 299 cb(buf, "", 0, alignments ? alignments[col] : 0, is_header); | |
| 300 col++; | |
| 301 } | |
| 302 } | |
| 303 | |
| 304 static void emit_table_cell(StringBuffer *buf, const char *cell, size_t len, int align, int is_header) | |
| 305 { | |
| 306 const char *tag = is_header ? "th" : "td"; | |
| 307 const char *align_attr = ""; | |
| 308 | |
| 309 if (align == 1) align_attr = " style=\"text-align:center\""; | |
| 310 else if (align == 2) align_attr = " style=\"text-align:right\""; | |
| 311 | |
| 312 buffer_append(buf, "<"); | |
| 313 buffer_append(buf, tag); | |
| 314 buffer_append(buf, align_attr); | |
| 315 buffer_append(buf, ">"); | |
| 316 process_inline(buf, cell, len); | |
| 317 buffer_append(buf, "</"); | |
| 318 buffer_append(buf, tag); | |
| 319 buffer_append(buf, ">"); | |
| 320 } | |
| 321 | |
| 322 // Parse alignments from separator row | |
| 323 static void parse_alignments(const char *line, int *alignments, int num_cols) | |
| 324 { | |
| 325 line = skip_whitespace(line); | |
| 326 if (*line == '|') line++; | |
| 327 | |
| 328 int col = 0; | |
| 329 const char *cell_start = line; | |
| 330 | |
| 331 while (*line && col < num_cols) { | |
| 332 if (*line == '|' || *(line + 1) == '\0') { | |
| 333 size_t cell_len = line - cell_start; | |
| 334 if (*line != '|') cell_len++; | |
| 335 | |
| 336 alignments[col] = parse_alignment(cell_start, cell_len); | |
| 337 col++; | |
| 338 cell_start = line + 1; | |
| 339 } | |
| 340 line++; | |
| 341 } | |
| 188 } | 342 } |
| 189 | 343 |
| 190 // Process inline markdown (bold, italic, code, links, strikethrough) | 344 // Process inline markdown (bold, italic, code, links, strikethrough) |
| 191 static void process_inline(StringBuffer *buf, const char *text, size_t len) | 345 static void process_inline(StringBuffer *buf, const char *text, size_t len) |
| 192 { | 346 { |
| 530 | 684 |
| 531 buffer_append(buf, "</ol>"); | 685 buffer_append(buf, "</ol>"); |
| 532 continue; | 686 continue; |
| 533 } | 687 } |
| 534 | 688 |
| 689 // Table: | col1 | col2 | followed by |---|---| | |
| 690 if (is_table_row(line)) { | |
| 691 // Peek at next line to see if it's a separator | |
| 692 const char *peek_ptr = ptr; | |
| 693 if (*peek_ptr == '\n') peek_ptr++; | |
| 694 | |
| 695 const char *next_line_start = peek_ptr; | |
| 696 while (*peek_ptr && *peek_ptr != '\n') peek_ptr++; | |
| 697 size_t next_line_len = peek_ptr - next_line_start; | |
| 698 | |
| 699 char *next_line = (char *)malloc(next_line_len + 1); | |
| 700 if (next_line) { | |
| 701 memcpy(next_line, next_line_start, next_line_len); | |
| 702 next_line[next_line_len] = '\0'; | |
| 703 | |
| 704 if (is_table_separator(next_line)) { | |
| 705 // It's a table! | |
| 706 int num_cols = count_table_columns(line); | |
| 707 int *alignments = (int *)calloc(num_cols, sizeof(int)); | |
| 708 | |
| 709 buffer_append(buf, "<table>"); | |
| 710 | |
| 711 // Header row | |
| 712 buffer_append(buf, "<thead><tr>"); | |
| 713 parse_table_row(buf, line, NULL, num_cols, 1, emit_table_cell); | |
| 714 buffer_append(buf, "</tr></thead>"); | |
| 715 | |
| 716 free(line); | |
| 717 if (*ptr == '\n') ptr++; | |
| 718 | |
| 719 // Parse alignments from separator | |
| 720 parse_alignments(next_line, alignments, num_cols); | |
| 721 free(next_line); | |
| 722 | |
| 723 // Skip separator line | |
| 724 ptr = peek_ptr; | |
| 725 if (*ptr == '\n') ptr++; | |
| 726 | |
| 727 // Body rows | |
| 728 buffer_append(buf, "<tbody>"); | |
| 729 | |
| 730 while (*ptr) { | |
| 731 line_start = ptr; | |
| 732 while (*ptr && *ptr != '\n') ptr++; | |
| 733 line_len = ptr - line_start; | |
| 734 | |
| 735 line = (char *)malloc(line_len + 1); | |
| 736 if (!line) break; | |
| 737 memcpy(line, line_start, line_len); | |
| 738 line[line_len] = '\0'; | |
| 739 | |
| 740 if (!is_table_row(line) || is_empty_line(line)) { | |
| 741 ptr = line_start; | |
| 742 free(line); | |
| 743 break; | |
| 744 } | |
| 745 | |
| 746 buffer_append(buf, "<tr>"); | |
| 747 parse_table_row(buf, line, alignments, num_cols, 0, emit_table_cell); | |
| 748 buffer_append(buf, "</tr>"); | |
| 749 | |
| 750 free(line); | |
| 751 if (*ptr == '\n') ptr++; | |
| 752 } | |
| 753 | |
| 754 buffer_append(buf, "</tbody></table>"); | |
| 755 free(alignments); | |
| 756 continue; | |
| 757 } | |
| 758 free(next_line); | |
| 759 } | |
| 760 } | |
| 761 | |
| 535 // HTML block - pass through unchanged | 762 // HTML block - pass through unchanged |
| 536 if (is_html_block_start(line)) { | 763 if (is_html_block_start(line)) { |
| 537 // Check if it's a script or style tag that needs special handling | 764 // Check if it's a script or style tag that needs special handling |
| 538 int is_script = is_html_tag(line, "script"); | 765 int is_script = is_html_tag(line, "script"); |
| 539 int is_style = is_html_tag(line, "style"); | 766 int is_style = is_html_tag(line, "style"); |
| 605 starts_with(line, "```") || | 832 starts_with(line, "```") || |
| 606 starts_with(line, ">") || | 833 starts_with(line, ">") || |
| 607 is_horizontal_rule(line) || | 834 is_horizontal_rule(line) || |
| 608 is_unordered_list(line) || | 835 is_unordered_list(line) || |
| 609 is_ordered_list(line) || | 836 is_ordered_list(line) || |
| 837 is_table_row(line) || | |
| 610 is_html_block_start(line)) { | 838 is_html_block_start(line)) { |
| 611 ptr = line_start; | 839 ptr = line_start; |
| 612 free(line); | 840 free(line); |
| 613 break; | 841 break; |
| 614 } | 842 } |