]> git.cameronkatri.com Git - mandoc.git/blob - mdocterm.c
mdoclint accepts multiple files
[mandoc.git] / mdocterm.c
1 /* $Id: mdocterm.c,v 1.43 2009/03/15 07:08:53 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <sys/types.h>
20
21 #include <assert.h>
22 #include <ctype.h>
23 #include <err.h>
24 #include <getopt.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29
30 #include "mmain.h"
31 #include "term.h"
32
33 struct nroffopt {
34 int fl_h;
35 int fl_i;
36 char *arg_m;
37 char *arg_n;
38 char *arg_o;
39 char *arg_r;
40 char *arg_T;
41 struct termp *termp; /* Ephemeral. */
42 };
43
44 struct termseq {
45 const char *enc;
46 int sym;
47 };
48
49 dead_pre void punt(struct nroffopt *, char *) dead_post;
50 static int option(void *, int, char *);
51 static int optsopt(struct termp *, char *);
52 static void body(struct termp *,
53 struct termpair *,
54 const struct mdoc_meta *,
55 const struct mdoc_node *);
56 static void header(struct termp *,
57 const struct mdoc_meta *);
58 static void footer(struct termp *,
59 const struct mdoc_meta *);
60
61 static void pword(struct termp *, const char *, size_t);
62 static void pescape(struct termp *, const char *,
63 size_t *, size_t);
64 static void nescape(struct termp *,
65 const char *, size_t);
66 static void chara(struct termp *, char);
67 static void stringa(struct termp *,
68 const char *, size_t);
69 static void symbola(struct termp *, enum tsym);
70 static void sanity(const struct mdoc_node *);
71
72 #ifdef __linux__
73 extern size_t strlcat(char *, const char *, size_t);
74 extern size_t strlcpy(char *, const char *, size_t);
75 #endif
76
77 static struct termseq termenc1[] = {
78 { "\\", TERMSYM_SLASH },
79 { "\'", TERMSYM_RSQUOTE },
80 { "`", TERMSYM_LSQUOTE },
81 { "-", TERMSYM_HYPHEN },
82 { " ", TERMSYM_SPACE },
83 { ".", TERMSYM_PERIOD },
84 { "&", TERMSYM_BREAK },
85 { "e", TERMSYM_SLASH },
86 { "q", TERMSYM_DQUOTE },
87 { "|", TERMSYM_BREAK },
88 { NULL, 0 }
89 };
90
91 static struct termseq termenc2[] = {
92 { "rC", TERMSYM_RBRACE },
93 { "lC", TERMSYM_LBRACE },
94 { "rB", TERMSYM_RBRACK },
95 { "lB", TERMSYM_LBRACK },
96 { "ra", TERMSYM_RANGLE },
97 { "la", TERMSYM_LANGLE },
98 { "Lq", TERMSYM_LDQUOTE },
99 { "lq", TERMSYM_LDQUOTE },
100 { "Rq", TERMSYM_RDQUOTE },
101 { "rq", TERMSYM_RDQUOTE },
102 { "oq", TERMSYM_LSQUOTE },
103 { "aq", TERMSYM_RSQUOTE },
104
105 { "<-", TERMSYM_LARROW },
106 { "->", TERMSYM_RARROW },
107 { "ua", TERMSYM_UARROW },
108 { "da", TERMSYM_DARROW },
109
110 { "bu", TERMSYM_BULLET },
111 { "Ba", TERMSYM_BAR },
112 { "ba", TERMSYM_BAR },
113 { "co", TERMSYM_COPY },
114 { "Am", TERMSYM_AMP },
115
116 { "Le", TERMSYM_LE },
117 { "<=", TERMSYM_LE },
118 { "Ge", TERMSYM_GE },
119 { ">=", TERMSYM_GE },
120 { "==", TERMSYM_EQ },
121 { "Ne", TERMSYM_NEQ },
122 { "!=", TERMSYM_NEQ },
123 { "Pm", TERMSYM_PLUSMINUS },
124 { "+-", TERMSYM_PLUSMINUS },
125 { "If", TERMSYM_INF2 },
126 { "if", TERMSYM_INF },
127 { "Na", TERMSYM_NAN },
128 { "na", TERMSYM_NAN },
129 { "**", TERMSYM_ASTERISK },
130 { "Gt", TERMSYM_GT },
131 { "Lt", TERMSYM_LT },
132
133 { "aa", TERMSYM_ACUTE },
134 { "ga", TERMSYM_GRAVE },
135
136 { "en", TERMSYM_EN },
137 { "em", TERMSYM_EM },
138
139 { "Pi", TERMSYM_PI },
140 { NULL, 0 }
141 };
142
143 /* FIXME: abstract to dynamically-compiled table. */
144 static struct termsym termsym_ascii[TERMSYM_MAX] = {
145 { "]", 1 }, /* TERMSYM_RBRACK */
146 { "[", 1 }, /* TERMSYM_LBRACK */
147 { "<-", 2 }, /* TERMSYM_LARROW */
148 { "->", 2 }, /* TERMSYM_RARROW */
149 { "^", 1 }, /* TERMSYM_UARROW */
150 { "v", 1 }, /* TERMSYM_DARROW */
151 { "`", 1 }, /* TERMSYM_LSQUOTE */
152 { "\'", 1 }, /* TERMSYM_RSQUOTE */
153 { "\'", 1 }, /* TERMSYM_SQUOTE */
154 { "``", 2 }, /* TERMSYM_LDQUOTE */
155 { "\'\'", 2 }, /* TERMSYM_RDQUOTE */
156 { "\"", 1 }, /* TERMSYM_DQUOTE */
157 { "<", 1 }, /* TERMSYM_LT */
158 { ">", 1 }, /* TERMSYM_GT */
159 { "<=", 2 }, /* TERMSYM_LE */
160 { ">=", 2 }, /* TERMSYM_GE */
161 { "==", 2 }, /* TERMSYM_EQ */
162 { "!=", 2 }, /* TERMSYM_NEQ */
163 { "\'", 1 }, /* TERMSYM_ACUTE */
164 { "`", 1 }, /* TERMSYM_GRAVE */
165 { "pi", 2 }, /* TERMSYM_PI */
166 { "+=", 2 }, /* TERMSYM_PLUSMINUS */
167 { "oo", 2 }, /* TERMSYM_INF */
168 { "infinity", 8 }, /* TERMSYM_INF2 */
169 { "NaN", 3 }, /* TERMSYM_NAN */
170 { "|", 1 }, /* TERMSYM_BAR */
171 { "o", 1 }, /* TERMSYM_BULLET */
172 { "&", 1 }, /* TERMSYM_AMP */
173 { "--", 2 }, /* TERMSYM_EM */
174 { "-", 1 }, /* TERMSYM_EN */
175 { "(C)", 3 }, /* TERMSYM_COPY */
176 { "*", 1 }, /* TERMSYM_ASTERISK */
177 { "\\", 1 }, /* TERMSYM_SLASH */
178 { "-", 1 }, /* TERMSYM_HYPHEN */
179 { " ", 1 }, /* TERMSYM_SPACE */
180 { ".", 1 }, /* TERMSYM_PERIOD */
181 { "", 0 }, /* TERMSYM_BREAK */
182 { "<", 1 }, /* TERMSYM_LANGLE */
183 { ">", 1 }, /* TERMSYM_RANGLE */
184 { "{", 1 }, /* TERMSYM_LBRACE */
185 { "}", 1 }, /* TERMSYM_RBRACE */
186 };
187
188 int
189 main(int argc, char *argv[])
190 {
191 struct mmain *p;
192 const struct mdoc *mdoc;
193 struct nroffopt nroff;
194 struct termp termp;
195 int c;
196 char *in;
197
198 (void)memset(&termp, 0, sizeof(struct termp));
199 (void)memset(&nroff, 0, sizeof(struct nroffopt));
200
201 termp.maxrmargin = termp.rmargin = 78; /* FIXME */
202 termp.maxcols = 1024; /* FIXME */
203 termp.offset = termp.col = 0;
204 termp.flags = TERMP_NOSPACE;
205 termp.symtab = termsym_ascii;
206 termp.enc = TERMENC_NROFF;
207
208 nroff.termp = &termp;
209
210 p = mmain_alloc();
211
212 c = mmain_getopt(p, argc, argv, "[-Ooption...]",
213 "[infile]", "him:n:o:r:T:O:", &nroff, option);
214
215 /* FIXME: this needs to accept multiple outputs. */
216 argv += c;
217 if ((argc -= c) > 0)
218 in = *argv++;
219 else
220 in = "-";
221
222 mmain_prepare(p, in);
223
224 if (NULL == (mdoc = mmain_process(p))) {
225 if (TERMP_NOPUNT & termp.iflags)
226 mmain_exit(p, 1);
227 mmain_free(p);
228 punt(&nroff, in);
229 /* NOTREACHED */
230 }
231
232 if (NULL == (termp.buf = malloc(termp.maxcols)))
233 err(1, "malloc");
234
235 header(&termp, mdoc_meta(mdoc));
236 body(&termp, NULL, mdoc_meta(mdoc), mdoc_node(mdoc));
237 footer(&termp, mdoc_meta(mdoc));
238
239 free(termp.buf);
240
241 mmain_exit(p, 0);
242 /* NOTREACHED */
243 }
244
245
246 static int
247 optsopt(struct termp *p, char *arg)
248 {
249 char *v;
250 char *toks[] = { "ansi", "nopunt", NULL };
251
252 while (*arg)
253 switch (getsubopt(&arg, toks, &v)) {
254 case (0):
255 p->enc = TERMENC_ANSI;
256 break;
257 case (2):
258 p->iflags |= TERMP_NOPUNT;
259 break;
260 default:
261 warnx("unknown -O argument");
262 return(0);
263 }
264
265 return(1);
266 }
267
268
269 static int
270 option(void *ptr, int c, char *arg)
271 {
272 struct termp *termp;
273 struct nroffopt *nroff;
274
275 nroff = (struct nroffopt *)ptr;
276 termp = nroff->termp;
277
278 switch (c) {
279 case ('h'):
280 nroff->fl_h = 1;
281 break;
282 case ('i'):
283 nroff->fl_i = 1;
284 break;
285 case ('m'):
286 nroff->arg_m = arg;
287 break;
288 case ('n'):
289 nroff->arg_n = arg;
290 break;
291 case ('o'):
292 nroff->arg_o = arg;
293 break;
294 case ('r'):
295 nroff->arg_r = arg;
296 break;
297 case ('T'):
298 nroff->arg_T = arg;
299 break;
300 case ('O'):
301 return(optsopt(termp, arg));
302 default:
303 break;
304 }
305
306 return(1);
307 }
308
309
310 /*
311 * Flush a line of text. A "line" is loosely defined as being something
312 * that should be followed by a newline, regardless of whether it's
313 * broken apart by newlines getting there. A line can also be a
314 * fragment of a columnar list.
315 *
316 * Specifically, a line is whatever's in p->buf of length p->col, which
317 * is zeroed after this function returns.
318 *
319 * The variables TERMP_NOLPAD, TERMP_LITERAL and TERMP_NOBREAK are of
320 * critical importance here. Their behaviour follows:
321 *
322 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
323 * offset value. This is useful when doing columnar lists where the
324 * prior column has right-padded.
325 *
326 * - TERMP_NOBREAK: this is the most important and is used when making
327 * columns. In short: don't print a newline and instead pad to the
328 * right margin. Used in conjunction with TERMP_NOLPAD.
329 *
330 * In-line line breaking:
331 *
332 * If TERMP_NOBREAK is specified and the line overruns the right
333 * margin, it will break and pad-right to the right margin after
334 * writing. If maxrmargin is violated, it will break and continue
335 * writing from the right-margin, which will lead to the above
336 * scenario upon exit.
337 *
338 * Otherwise, the line will break at the right margin. Extremely long
339 * lines will cause the system to emit a warning (TODO: hyphenate, if
340 * possible).
341 */
342 void
343 flushln(struct termp *p)
344 {
345 size_t i, j, vsz, vis, maxvis, mmax, bp;
346
347 /*
348 * First, establish the maximum columns of "visible" content.
349 * This is usually the difference between the right-margin and
350 * an indentation, but can be, for tagged lists or columns, a
351 * small set of values.
352 */
353
354 assert(p->offset < p->rmargin);
355 maxvis = p->rmargin - p->offset;
356 mmax = p->maxrmargin - p->offset;
357 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
358 vis = 0;
359
360 /*
361 * If in the standard case (left-justified), then begin with our
362 * indentation, otherwise (columns, etc.) just start spitting
363 * out text.
364 */
365
366 if ( ! (p->flags & TERMP_NOLPAD))
367 /* LINTED */
368 for (j = 0; j < p->offset; j++)
369 putchar(' ');
370
371 for (i = 0; i < p->col; i++) {
372 /*
373 * Count up visible word characters. Control sequences
374 * (starting with the CSI) aren't counted. A space
375 * generates a non-printing word, which is valid (the
376 * space is printed according to regular spacing rules).
377 */
378
379 /* LINTED */
380 for (j = i, vsz = 0; j < p->col; j++) {
381 if (isspace((u_char)p->buf[j])) {
382 break;
383 } else if (27 == p->buf[j]) {
384 assert(TERMENC_ANSI == p->enc);
385 assert(j + 5 <= p->col);
386 j += 4;
387 } else if (8 == p->buf[j]) {
388 assert(TERMENC_NROFF == p->enc);
389 assert(j + 2 <= p->col);
390 j += 1;
391 } else
392 vsz++;
393 }
394
395 /*
396 * Do line-breaking. If we're greater than our
397 * break-point and already in-line, break to the next
398 * line and start writing. If we're at the line start,
399 * then write out the word (TODO: hyphenate) and break
400 * in a subsequent loop invocation.
401 */
402
403 if ( ! (TERMP_NOBREAK & p->flags)) {
404 if (vis && vis + vsz > bp) {
405 putchar('\n');
406 for (j = 0; j < p->offset; j++)
407 putchar(' ');
408 vis = 0;
409 } else if (vis + vsz > bp)
410 warnx("word breaks right margin");
411
412 /* TODO: hyphenate. */
413
414 } else {
415 if (vis && vis + vsz > bp) {
416 putchar('\n');
417 for (j = 0; j < p->rmargin; j++)
418 putchar(' ');
419 vis = p->rmargin - p->offset;
420 } else if (vis + vsz > bp)
421 warnx("word breaks right margin");
422
423 /* TODO: hyphenate. */
424 }
425
426 /*
427 * Write out the word and a trailing space. Omit the
428 * space if we're the last word in the line or beyond
429 * our breakpoint.
430 */
431
432 for ( ; i < p->col; i++) {
433 if (isspace((u_char)p->buf[i]))
434 break;
435 putchar(p->buf[i]);
436 }
437 vis += vsz;
438 if (i < p->col && vis <= bp) {
439 putchar(' ');
440 vis++;
441 }
442 }
443
444 /*
445 * If we've overstepped our maximum visible no-break space, then
446 * cause a newline and offset at the right margin.
447 */
448
449 if ((TERMP_NOBREAK & p->flags) && vis >= maxvis) {
450 if ( ! (TERMP_NONOBREAK & p->flags)) {
451 putchar('\n');
452 for (i = 0; i < p->rmargin; i++)
453 putchar(' ');
454 }
455 p->col = 0;
456 return;
457 }
458
459 /*
460 * If we're not to right-marginalise it (newline), then instead
461 * pad to the right margin and stay off.
462 */
463
464 if (p->flags & TERMP_NOBREAK) {
465 if ( ! (TERMP_NONOBREAK & p->flags))
466 for ( ; vis < maxvis; vis++)
467 putchar(' ');
468 } else
469 putchar('\n');
470
471 p->col = 0;
472 }
473
474
475 /*
476 * A newline only breaks an existing line; it won't assert vertical
477 * space. All data in the output buffer is flushed prior to the newline
478 * assertion.
479 */
480 void
481 newln(struct termp *p)
482 {
483
484 p->flags |= TERMP_NOSPACE;
485 if (0 == p->col) {
486 p->flags &= ~TERMP_NOLPAD;
487 return;
488 }
489 flushln(p);
490 p->flags &= ~TERMP_NOLPAD;
491 }
492
493
494 /*
495 * Asserts a vertical space (a full, empty line-break between lines).
496 * Note that if used twice, this will cause two blank spaces and so on.
497 * All data in the output buffer is flushed prior to the newline
498 * assertion.
499 */
500 void
501 vspace(struct termp *p)
502 {
503
504 newln(p);
505 putchar('\n');
506 }
507
508
509 /*
510 * Break apart a word into "pwords" (partial-words, usually from
511 * breaking up a phrase into individual words) and, eventually, put them
512 * into the output buffer. If we're a literal word, then don't break up
513 * the word and put it verbatim into the output buffer.
514 */
515 void
516 word(struct termp *p, const char *word)
517 {
518 size_t i, j, len;
519
520 if (p->flags & TERMP_LITERAL) {
521 pword(p, word, strlen(word));
522 return;
523 }
524
525 if (0 == (len = strlen(word)))
526 errx(1, "blank line not in literal context");
527
528 if (mdoc_isdelim(word)) {
529 if ( ! (p->flags & TERMP_IGNDELIM))
530 p->flags |= TERMP_NOSPACE;
531 p->flags &= ~TERMP_IGNDELIM;
532 }
533
534 /* LINTED */
535 for (j = i = 0; i < len; i++) {
536 if ( ! isspace((u_char)word[i])) {
537 j++;
538 continue;
539 }
540
541 /* Escaped spaces don't delimit... */
542 if (i > 0 && isspace((u_char)word[i]) &&
543 '\\' == word[i - 1]) {
544 j++;
545 continue;
546 }
547
548 if (0 == j)
549 continue;
550 assert(i >= j);
551 pword(p, &word[i - j], j);
552 j = 0;
553 }
554 if (j > 0) {
555 assert(i >= j);
556 pword(p, &word[i - j], j);
557 }
558 }
559
560
561 /*
562 * This is the main function for printing out nodes. It's constituted
563 * of PRE and POST functions, which correspond to prefix and infix
564 * processing. The termpair structure allows data to persist between
565 * prefix and postfix invocations.
566 */
567 static void
568 body(struct termp *p, struct termpair *ppair,
569 const struct mdoc_meta *meta,
570 const struct mdoc_node *node)
571 {
572 int dochild;
573 struct termpair pair;
574
575 /* Some quick sanity-checking. */
576
577 sanity(node);
578
579 /* Pre-processing. */
580
581 dochild = 1;
582 pair.ppair = ppair;
583 pair.type = 0;
584 pair.offset = pair.rmargin = 0;
585 pair.flag = 0;
586 pair.count = 0;
587
588 if (MDOC_TEXT != node->type) {
589 if (termacts[node->tok].pre)
590 if ( ! (*termacts[node->tok].pre)(p, &pair, meta, node))
591 dochild = 0;
592 } else /* MDOC_TEXT == node->type */
593 word(p, node->string);
594
595 /* Children. */
596
597 if (TERMPAIR_FLAG & pair.type)
598 p->flags |= pair.flag;
599
600 if (dochild && node->child)
601 body(p, &pair, meta, node->child);
602
603 if (TERMPAIR_FLAG & pair.type)
604 p->flags &= ~pair.flag;
605
606 /* Post-processing. */
607
608 if (MDOC_TEXT != node->type)
609 if (termacts[node->tok].post)
610 (*termacts[node->tok].post)(p, &pair, meta, node);
611
612 /* Siblings. */
613
614 if (node->next)
615 body(p, ppair, meta, node->next);
616 }
617
618
619 static void
620 footer(struct termp *p, const struct mdoc_meta *meta)
621 {
622 struct tm *tm;
623 char *buf, *os;
624
625 if (NULL == (buf = malloc(p->rmargin)))
626 err(1, "malloc");
627 if (NULL == (os = malloc(p->rmargin)))
628 err(1, "malloc");
629
630 tm = localtime(&meta->date);
631
632 #ifdef __OpenBSD__
633 if (NULL == strftime(buf, p->rmargin, "%B %d, %Y", tm))
634 #else
635 if (0 == strftime(buf, p->rmargin, "%B %d, %Y", tm))
636 #endif
637 err(1, "strftime");
638
639 (void)strlcpy(os, meta->os, p->rmargin);
640
641 /*
642 * This is /slightly/ different from regular groff output
643 * because we don't have page numbers. Print the following:
644 *
645 * OS MDOCDATE
646 */
647
648 vspace(p);
649
650 p->flags |= TERMP_NOSPACE | TERMP_NOBREAK;
651 p->rmargin = p->maxrmargin - strlen(buf);
652 p->offset = 0;
653
654 word(p, os);
655 flushln(p);
656
657 p->flags |= TERMP_NOLPAD | TERMP_NOSPACE;
658 p->offset = p->rmargin;
659 p->rmargin = p->maxrmargin;
660 p->flags &= ~TERMP_NOBREAK;
661
662 word(p, buf);
663 flushln(p);
664
665 free(buf);
666 free(os);
667 }
668
669
670 static void
671 header(struct termp *p, const struct mdoc_meta *meta)
672 {
673 char *buf, *title, *bufp;
674
675 p->rmargin = p->maxrmargin;
676 p->offset = 0;
677
678 if (NULL == (buf = malloc(p->rmargin)))
679 err(1, "malloc");
680 if (NULL == (title = malloc(p->rmargin)))
681 err(1, "malloc");
682
683 /*
684 * The header is strange. It has three components, which are
685 * really two with the first duplicated. It goes like this:
686 *
687 * IDENTIFIER TITLE IDENTIFIER
688 *
689 * The IDENTIFIER is NAME(SECTION), which is the command-name
690 * (if given, or "unknown" if not) followed by the manual page
691 * section. These are given in `Dt'. The TITLE is a free-form
692 * string depending on the manual volume. If not specified, it
693 * switches on the manual section.
694 */
695
696 assert(meta->vol);
697 (void)strlcpy(buf, meta->vol, p->rmargin);
698
699 if (meta->arch) {
700 (void)strlcat(buf, " (", p->rmargin);
701 (void)strlcat(buf, meta->arch, p->rmargin);
702 (void)strlcat(buf, ")", p->rmargin);
703 }
704
705 (void)snprintf(title, p->rmargin, "%s(%d)",
706 meta->title, meta->msec);
707
708 for (bufp = title; *bufp; bufp++)
709 *bufp = toupper((u_char)*bufp);
710
711 p->offset = 0;
712 p->rmargin = (p->maxrmargin - strlen(buf)) / 2;
713 p->flags |= TERMP_NOBREAK | TERMP_NOSPACE;
714
715 word(p, title);
716 flushln(p);
717
718 p->flags |= TERMP_NOLPAD | TERMP_NOSPACE;
719 p->offset = p->rmargin;
720 p->rmargin = p->maxrmargin - strlen(title);
721
722 word(p, buf);
723 flushln(p);
724
725 p->offset = p->rmargin;
726 p->rmargin = p->maxrmargin;
727 p->flags &= ~TERMP_NOBREAK;
728 p->flags |= TERMP_NOLPAD | TERMP_NOSPACE;
729
730 word(p, title);
731 flushln(p);
732
733 p->rmargin = p->maxrmargin;
734 p->offset = 0;
735 p->flags &= ~TERMP_NOSPACE;
736
737 free(title);
738 free(buf);
739 }
740
741
742 /*
743 * Determine the symbol indicated by an escape sequences, that is, one
744 * starting with a backslash. Once done, we pass this value into the
745 * output buffer by way of the symbol table.
746 */
747 static void
748 nescape(struct termp *p, const char *word, size_t len)
749 {
750 struct termseq *enc;
751
752 switch (len) {
753 case (1):
754 enc = termenc1;
755 break;
756 case (2):
757 enc = termenc2;
758 break;
759 default:
760 warnx("unsupported %zu-byte escape sequence", len);
761 return;
762 }
763
764 for ( ; enc->enc; enc++)
765 if (0 == memcmp(enc->enc, word, len)) {
766 symbola(p, enc->sym);
767 return;
768 }
769
770 warnx("unsupported %zu-byte escape sequence", len);
771 }
772
773
774 /*
775 * Handle an escape sequence: determine its length and pass it to the
776 * escape-symbol look table. Note that we assume mdoc(3) has validated
777 * the escape sequence (we assert upon badly-formed escape sequences).
778 */
779 static void
780 pescape(struct termp *p, const char *word, size_t *i, size_t len)
781 {
782 size_t j;
783
784 if (++(*i) >= len) {
785 warnx("ignoring bad escape sequence");
786 return;
787 }
788
789 if ('(' == word[*i]) {
790 (*i)++;
791 if (*i + 1 >= len) {
792 warnx("ignoring bad escape sequence");
793 return;
794 }
795 nescape(p, &word[*i], 2);
796 (*i)++;
797 return;
798
799 } else if ('*' == word[*i]) {
800 (*i)++;
801 if (*i >= len) {
802 warnx("ignoring bad escape sequence");
803 return;
804 }
805 switch (word[*i]) {
806 case ('('):
807 (*i)++;
808 if (*i + 1 >= len) {
809 warnx("ignoring bad escape sequence");
810 return;
811 }
812 nescape(p, &word[*i], 2);
813 (*i)++;
814 return;
815 case ('['):
816 break;
817 default:
818 nescape(p, &word[*i], 1);
819 return;
820 }
821
822 } else if ('[' != word[*i]) {
823 nescape(p, &word[*i], 1);
824 return;
825 }
826
827 (*i)++;
828 for (j = 0; word[*i] && ']' != word[*i]; (*i)++, j++)
829 /* Loop... */ ;
830
831 if (0 == word[*i]) {
832 warnx("ignoring bad escape sequence");
833 return;
834 }
835 nescape(p, &word[*i - j], j);
836 }
837
838
839 /*
840 * Handle pwords, partial words, which may be either a single word or a
841 * phrase that cannot be broken down (such as a literal string). This
842 * handles word styling.
843 */
844 static void
845 pword(struct termp *p, const char *word, size_t len)
846 {
847 size_t i;
848
849 if ( ! (TERMP_NOSPACE & p->flags) &&
850 ! (TERMP_LITERAL & p->flags))
851 chara(p, ' ');
852
853 if ( ! (p->flags & TERMP_NONOSPACE))
854 p->flags &= ~TERMP_NOSPACE;
855
856 /*
857 * If ANSI (word-length styling), then apply our style now,
858 * before the word.
859 */
860
861 if (TERMENC_ANSI == p->enc && TERMP_STYLE & p->flags) {
862 if (TERMP_BOLD & p->flags) {
863 chara(p, 27);
864 stringa(p, "[01m", 4);
865 }
866 if (TERMP_UNDER & p->flags) {
867 chara(p, 27);
868 stringa(p, "[04m", 4);
869 }
870 if (TERMP_RED & p->flags) {
871 chara(p, 27);
872 stringa(p, "[31m", 4);
873 }
874 if (TERMP_GREEN & p->flags) {
875 chara(p, 27);
876 stringa(p, "[32m", 4);
877 }
878 if (TERMP_YELLOW & p->flags) {
879 chara(p, 27);
880 stringa(p, "[33m", 4);
881 }
882 if (TERMP_BLUE & p->flags) {
883 chara(p, 27);
884 stringa(p, "[34m", 4);
885 }
886 if (TERMP_MAGENTA & p->flags) {
887 chara(p, 27);
888 stringa(p, "[35m", 4);
889 }
890 if (TERMP_CYAN & p->flags) {
891 chara(p, 27);
892 stringa(p, "[36m", 4);
893 }
894 }
895
896 for (i = 0; i < len; i++) {
897 if ('\\' == word[i]) {
898 pescape(p, word, &i, len);
899 continue;
900 }
901
902 if (TERMENC_NROFF == p->enc &&
903 TERMP_STYLE & p->flags) {
904 if (TERMP_BOLD & p->flags) {
905 chara(p, word[i]);
906 chara(p, 8);
907 }
908 if (TERMP_UNDER & p->flags) {
909 chara(p, '_');
910 chara(p, 8);
911 }
912 }
913
914 chara(p, word[i]);
915 }
916
917 if (TERMENC_ANSI == p->enc && TERMP_STYLE & p->flags) {
918 chara(p, 27);
919 stringa(p, "[00m", 4);
920 }
921 }
922
923
924 /*
925 * Add a symbol to the output line buffer.
926 */
927 static void
928 symbola(struct termp *p, enum tsym sym)
929 {
930
931 assert(p->symtab[sym].sym);
932 stringa(p, p->symtab[sym].sym, p->symtab[sym].sz);
933 }
934
935
936 /*
937 * Like chara() but for arbitrary-length buffers. Resize the buffer by
938 * a factor of two (if the buffer is less than that) or the buffer's
939 * size.
940 */
941 static void
942 stringa(struct termp *p, const char *c, size_t sz)
943 {
944 size_t s;
945
946 if (0 == sz)
947 return;
948
949 s = sz > p->maxcols * 2 ? sz : p->maxcols * 2;
950
951 assert(c);
952 if (p->col + sz >= p->maxcols) {
953 p->buf = realloc(p->buf, s);
954 if (NULL == p->buf)
955 err(1, "realloc");
956 p->maxcols = s;
957 }
958
959 (void)memcpy(&p->buf[p->col], c, sz);
960 p->col += sz;
961 }
962
963
964 /*
965 * Insert a single character into the line-buffer. If the buffer's
966 * space is exceeded, then allocate more space by doubling the buffer
967 * size.
968 */
969 static void
970 chara(struct termp *p, char c)
971 {
972
973 if (p->col + 1 >= p->maxcols) {
974 p->buf = realloc(p->buf, p->maxcols * 2);
975 if (NULL == p->buf)
976 err(1, "malloc");
977 p->maxcols *= 2;
978 }
979 p->buf[(p->col)++] = c;
980 }
981
982
983 static void
984 sanity(const struct mdoc_node *n)
985 {
986
987 switch (n->type) {
988 case (MDOC_TEXT):
989 if (n->child)
990 errx(1, "regular form violated (1)");
991 if (NULL == n->parent)
992 errx(1, "regular form violated (2)");
993 if (NULL == n->string)
994 errx(1, "regular form violated (3)");
995 switch (n->parent->type) {
996 case (MDOC_TEXT):
997 /* FALLTHROUGH */
998 case (MDOC_ROOT):
999 errx(1, "regular form violated (4)");
1000 /* NOTREACHED */
1001 default:
1002 break;
1003 }
1004 break;
1005 case (MDOC_ELEM):
1006 if (NULL == n->parent)
1007 errx(1, "regular form violated (5)");
1008 switch (n->parent->type) {
1009 case (MDOC_TAIL):
1010 /* FALLTHROUGH */
1011 case (MDOC_BODY):
1012 /* FALLTHROUGH */
1013 case (MDOC_HEAD):
1014 break;
1015 default:
1016 errx(1, "regular form violated (6)");
1017 /* NOTREACHED */
1018 }
1019 if (n->child) switch (n->child->type) {
1020 case (MDOC_TEXT):
1021 break;
1022 default:
1023 errx(1, "regular form violated (7(");
1024 /* NOTREACHED */
1025 }
1026 break;
1027 case (MDOC_HEAD):
1028 /* FALLTHROUGH */
1029 case (MDOC_BODY):
1030 /* FALLTHROUGH */
1031 case (MDOC_TAIL):
1032 if (NULL == n->parent)
1033 errx(1, "regular form violated (8)");
1034 if (MDOC_BLOCK != n->parent->type)
1035 errx(1, "regular form violated (9)");
1036 if (n->child) switch (n->child->type) {
1037 case (MDOC_BLOCK):
1038 /* FALLTHROUGH */
1039 case (MDOC_ELEM):
1040 /* FALLTHROUGH */
1041 case (MDOC_TEXT):
1042 break;
1043 default:
1044 errx(1, "regular form violated (a)");
1045 /* NOTREACHED */
1046 }
1047 break;
1048 case (MDOC_BLOCK):
1049 if (NULL == n->parent)
1050 errx(1, "regular form violated (b)");
1051 if (NULL == n->child)
1052 errx(1, "regular form violated (c)");
1053 switch (n->parent->type) {
1054 case (MDOC_ROOT):
1055 /* FALLTHROUGH */
1056 case (MDOC_HEAD):
1057 /* FALLTHROUGH */
1058 case (MDOC_BODY):
1059 /* FALLTHROUGH */
1060 case (MDOC_TAIL):
1061 break;
1062 default:
1063 errx(1, "regular form violated (d)");
1064 /* NOTREACHED */
1065 }
1066 switch (n->child->type) {
1067 case (MDOC_ROOT):
1068 /* FALLTHROUGH */
1069 case (MDOC_ELEM):
1070 errx(1, "regular form violated (e)");
1071 /* NOTREACHED */
1072 default:
1073 break;
1074 }
1075 break;
1076 case (MDOC_ROOT):
1077 if (n->parent)
1078 errx(1, "regular form violated (f)");
1079 if (NULL == n->child)
1080 errx(1, "regular form violated (10)");
1081 switch (n->child->type) {
1082 case (MDOC_BLOCK):
1083 break;
1084 default:
1085 errx(1, "regular form violated (11)");
1086 /* NOTREACHED */
1087 }
1088 break;
1089 }
1090 }
1091
1092
1093 dead_pre void
1094 punt(struct nroffopt *nroff, char *in)
1095 {
1096 char *args[32];
1097 char arg0[32], argm[32];
1098 int i;
1099
1100 warnx("punting to nroff!");
1101
1102 i = 0;
1103
1104 (void)strlcpy(arg0, "nroff", 32);
1105 args[i++] = arg0;
1106
1107 if (nroff->fl_h)
1108 args[i++] = "-h";
1109 if (nroff->fl_i)
1110 args[i++] = "-i";
1111
1112 if (nroff->arg_m) {
1113 (void)strlcpy(argm, "-m", 32);
1114 (void)strlcat(argm, nroff->arg_m, 32);
1115 args[i++] = argm;
1116 } else
1117 args[i++] = "-mandoc";
1118
1119 args[i++] = in;
1120 args[i++] = (char *)NULL;
1121
1122 (void)execvp("nroff", args);
1123 errx(1, "exec");
1124 /* NOTREACHED */
1125 }
1126