comparison markdown_converter/markdown_to_html.c @ 190:a2725419f988 hg-web

Updated so that bun builds will with already existing js files.
author MrJuneJune <me@mrjunejune.com>
date Sat, 24 Jan 2026 21:06:42 -0800
parents 8c74204fd362
children
comparison
equal deleted inserted replaced
188:32ce881452fa 190:a2725419f988
82 free(buf->data); 82 free(buf->data);
83 free(buf); 83 free(buf);
84 } 84 }
85 } 85 }
86 86
87 // Forward declaration
88 static void process_inline(StringBuffer *buf, const char *text, size_t len);
89
87 // Check if line starts with pattern (after trimming whitespace) 90 // Check if line starts with pattern (after trimming whitespace)
88 static int starts_with(const char *line, const char *pattern) 91 static int starts_with(const char *line, const char *pattern)
89 { 92 {
90 while (*line && isspace((unsigned char)*line)) line++; 93 while (*line && isspace((unsigned char)*line)) line++;
91 return strncmp(line, pattern, strlen(pattern)) == 0; 94 return strncmp(line, pattern, strlen(pattern)) == 0;
183 static int is_ordered_list(const char *line) 186 static int is_ordered_list(const char *line)
184 { 187 {
185 line = skip_whitespace(line); 188 line = skip_whitespace(line);
186 while (*line && isdigit((unsigned char)*line)) line++; 189 while (*line && isdigit((unsigned char)*line)) line++;
187 return *line == '.' && line[1] == ' '; 190 return *line == '.' && line[1] == ' ';
191 }
192
193 // Check if line could be a table row (contains |)
194 static int is_table_row(const char *line)
195 {
196 line = skip_whitespace(line);
197 // Must contain at least one |
198 return strchr(line, '|') != NULL;
199 }
200
201 // Check if line is a table separator (|---|---|)
202 static int is_table_separator(const char *line)
203 {
204 line = skip_whitespace(line);
205 int has_dash = 0;
206 int has_pipe = 0;
207
208 while (*line) {
209 char c = *line;
210 if (c == '|') has_pipe = 1;
211 else if (c == '-') has_dash = 1;
212 else if (c == ':') ; // alignment marker, allowed
213 else if (isspace((unsigned char)c)) ; // whitespace allowed
214 else return 0; // invalid character for separator
215 line++;
216 }
217
218 return has_dash && has_pipe;
219 }
220
221 // Parse alignment from separator cell (e.g., ":---:", "---:", ":---")
222 // Returns: 0 = left (default), 1 = center, 2 = right
223 static int parse_alignment(const char *cell, size_t len)
224 {
225 // Trim whitespace
226 while (len > 0 && isspace((unsigned char)*cell)) { cell++; len--; }
227 while (len > 0 && isspace((unsigned char)cell[len-1])) { len--; }
228
229 if (len == 0) return 0;
230
231 int left_colon = (cell[0] == ':');
232 int right_colon = (len > 0 && cell[len-1] == ':');
233
234 if (left_colon && right_colon) return 1; // center
235 if (right_colon) return 2; // right
236 return 0; // left (default)
237 }
238
239 // Count columns in a table row
240 static int count_table_columns(const char *line)
241 {
242 int count = 0;
243 int in_cell = 0;
244 line = skip_whitespace(line);
245
246 // Skip leading |
247 if (*line == '|') line++;
248
249 while (*line) {
250 if (*line == '|') {
251 count++;
252 in_cell = 0;
253 } else if (!isspace((unsigned char)*line)) {
254 in_cell = 1;
255 }
256 line++;
257 }
258
259 // Count last cell if there was content after last |
260 if (in_cell) count++;
261
262 return count > 0 ? count : 1;
263 }
264
265 // Parse table cells and call callback for each
266 typedef void (*cell_callback)(StringBuffer *buf, const char *cell, size_t len, int align, int is_header);
267
268 static void parse_table_row(StringBuffer *buf, const char *line, int *alignments, int num_cols, int is_header, cell_callback cb)
269 {
270 line = skip_whitespace(line);
271
272 // Skip leading |
273 if (*line == '|') line++;
274
275 int col = 0;
276 const char *cell_start = line;
277
278 while (*line && col < num_cols) {
279 if (*line == '|' || *(line + 1) == '\0') {
280 // End of cell
281 size_t cell_len = line - cell_start;
282 if (*line != '|') cell_len++; // include last char if no trailing |
283
284 // Trim whitespace from cell
285 while (cell_len > 0 && isspace((unsigned char)*cell_start)) { cell_start++; cell_len--; }
286 while (cell_len > 0 && isspace((unsigned char)cell_start[cell_len-1])) { cell_len--; }
287
288 int align = (alignments && col < num_cols) ? alignments[col] : 0;
289 cb(buf, cell_start, cell_len, align, is_header);
290
291 col++;
292 cell_start = line + 1;
293 }
294 line++;
295 }
296
297 // Fill remaining columns with empty cells
298 while (col < num_cols) {
299 cb(buf, "", 0, alignments ? alignments[col] : 0, is_header);
300 col++;
301 }
302 }
303
304 static void emit_table_cell(StringBuffer *buf, const char *cell, size_t len, int align, int is_header)
305 {
306 const char *tag = is_header ? "th" : "td";
307 const char *align_attr = "";
308
309 if (align == 1) align_attr = " style=\"text-align:center\"";
310 else if (align == 2) align_attr = " style=\"text-align:right\"";
311
312 buffer_append(buf, "<");
313 buffer_append(buf, tag);
314 buffer_append(buf, align_attr);
315 buffer_append(buf, ">");
316 process_inline(buf, cell, len);
317 buffer_append(buf, "</");
318 buffer_append(buf, tag);
319 buffer_append(buf, ">");
320 }
321
322 // Parse alignments from separator row
323 static void parse_alignments(const char *line, int *alignments, int num_cols)
324 {
325 line = skip_whitespace(line);
326 if (*line == '|') line++;
327
328 int col = 0;
329 const char *cell_start = line;
330
331 while (*line && col < num_cols) {
332 if (*line == '|' || *(line + 1) == '\0') {
333 size_t cell_len = line - cell_start;
334 if (*line != '|') cell_len++;
335
336 alignments[col] = parse_alignment(cell_start, cell_len);
337 col++;
338 cell_start = line + 1;
339 }
340 line++;
341 }
188 } 342 }
189 343
190 // Process inline markdown (bold, italic, code, links, strikethrough) 344 // Process inline markdown (bold, italic, code, links, strikethrough)
191 static void process_inline(StringBuffer *buf, const char *text, size_t len) 345 static void process_inline(StringBuffer *buf, const char *text, size_t len)
192 { 346 {
530 684
531 buffer_append(buf, "</ol>"); 685 buffer_append(buf, "</ol>");
532 continue; 686 continue;
533 } 687 }
534 688
689 // Table: | col1 | col2 | followed by |---|---|
690 if (is_table_row(line)) {
691 // Peek at next line to see if it's a separator
692 const char *peek_ptr = ptr;
693 if (*peek_ptr == '\n') peek_ptr++;
694
695 const char *next_line_start = peek_ptr;
696 while (*peek_ptr && *peek_ptr != '\n') peek_ptr++;
697 size_t next_line_len = peek_ptr - next_line_start;
698
699 char *next_line = (char *)malloc(next_line_len + 1);
700 if (next_line) {
701 memcpy(next_line, next_line_start, next_line_len);
702 next_line[next_line_len] = '\0';
703
704 if (is_table_separator(next_line)) {
705 // It's a table!
706 int num_cols = count_table_columns(line);
707 int *alignments = (int *)calloc(num_cols, sizeof(int));
708
709 buffer_append(buf, "<table>");
710
711 // Header row
712 buffer_append(buf, "<thead><tr>");
713 parse_table_row(buf, line, NULL, num_cols, 1, emit_table_cell);
714 buffer_append(buf, "</tr></thead>");
715
716 free(line);
717 if (*ptr == '\n') ptr++;
718
719 // Parse alignments from separator
720 parse_alignments(next_line, alignments, num_cols);
721 free(next_line);
722
723 // Skip separator line
724 ptr = peek_ptr;
725 if (*ptr == '\n') ptr++;
726
727 // Body rows
728 buffer_append(buf, "<tbody>");
729
730 while (*ptr) {
731 line_start = ptr;
732 while (*ptr && *ptr != '\n') ptr++;
733 line_len = ptr - line_start;
734
735 line = (char *)malloc(line_len + 1);
736 if (!line) break;
737 memcpy(line, line_start, line_len);
738 line[line_len] = '\0';
739
740 if (!is_table_row(line) || is_empty_line(line)) {
741 ptr = line_start;
742 free(line);
743 break;
744 }
745
746 buffer_append(buf, "<tr>");
747 parse_table_row(buf, line, alignments, num_cols, 0, emit_table_cell);
748 buffer_append(buf, "</tr>");
749
750 free(line);
751 if (*ptr == '\n') ptr++;
752 }
753
754 buffer_append(buf, "</tbody></table>");
755 free(alignments);
756 continue;
757 }
758 free(next_line);
759 }
760 }
761
535 // HTML block - pass through unchanged 762 // HTML block - pass through unchanged
536 if (is_html_block_start(line)) { 763 if (is_html_block_start(line)) {
537 // Check if it's a script or style tag that needs special handling 764 // Check if it's a script or style tag that needs special handling
538 int is_script = is_html_tag(line, "script"); 765 int is_script = is_html_tag(line, "script");
539 int is_style = is_html_tag(line, "style"); 766 int is_style = is_html_tag(line, "style");
605 starts_with(line, "```") || 832 starts_with(line, "```") ||
606 starts_with(line, ">") || 833 starts_with(line, ">") ||
607 is_horizontal_rule(line) || 834 is_horizontal_rule(line) ||
608 is_unordered_list(line) || 835 is_unordered_list(line) ||
609 is_ordered_list(line) || 836 is_ordered_list(line) ||
837 is_table_row(line) ||
610 is_html_block_start(line)) { 838 is_html_block_start(line)) {
611 ptr = line_start; 839 ptr = line_start;
612 free(line); 840 free(line);
613 break; 841 break;
614 } 842 }