]> git.cameronkatri.com Git - mandoc.git/blob - main.c
print_encode() using strcspn instead of looping/putchar() (noted by Joerg Sonnenberger).
[mandoc.git] / main.c
1 /* $Id: main.c,v 1.54 2009/10/31 06:17:19 kristaps Exp $ */
2 /*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17 #include <sys/stat.h>
18
19 #include <assert.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26
27 #include "mdoc.h"
28 #include "man.h"
29 #include "main.h"
30
31 #define UNCONST(a) ((void *)(uintptr_t)(const void *)(a))
32
33 /* Account for FreeBSD and Linux in our declarations. */
34
35 #ifdef __linux__
36 extern int getsubopt(char **, char * const *, char **);
37 extern size_t strlcat(char *, const char *, size_t);
38 # ifndef __dead
39 # define __dead __attribute__((__noreturn__))
40 # endif
41 #elif defined(__dead2)
42 # ifndef __dead
43 # define __dead __dead2
44 # endif
45 #endif
46
47 typedef void (*out_mdoc)(void *, const struct mdoc *);
48 typedef void (*out_man)(void *, const struct man *);
49 typedef void (*out_free)(void *);
50
51 struct buf {
52 char *buf;
53 size_t sz;
54 };
55
56 enum intt {
57 INTT_AUTO,
58 INTT_MDOC,
59 INTT_MAN
60 };
61
62 enum outt {
63 OUTT_ASCII = 0,
64 OUTT_TREE,
65 OUTT_HTML,
66 OUTT_LINT
67 };
68
69 struct curparse {
70 const char *file; /* Current parse. */
71 int fd; /* Current parse. */
72 int wflags;
73 #define WARN_WALL (1 << 0) /* All-warnings mask. */
74 #define WARN_WERR (1 << 2) /* Warnings->errors. */
75 int fflags;
76 #define IGN_SCOPE (1 << 0) /* Ignore scope errors. */
77 #define NO_IGN_ESCAPE (1 << 1) /* Don't ignore bad escapes. */
78 #define NO_IGN_MACRO (1 << 2) /* Don't ignore bad macros. */
79 #define NO_IGN_CHARS (1 << 3) /* Don't ignore bad chars. */
80 #define IGN_ERRORS (1 << 4) /* Ignore failed parse. */
81 enum intt inttype; /* Input parsers... */
82 struct man *man;
83 struct man *lastman;
84 struct mdoc *mdoc;
85 struct mdoc *lastmdoc;
86 enum outt outtype; /* Output devices... */
87 out_mdoc outmdoc;
88 out_man outman;
89 out_free outfree;
90 void *outdata;
91 char outopts[BUFSIZ];
92 };
93
94 static int foptions(int *, char *);
95 static int toptions(enum outt *, char *);
96 static int moptions(enum intt *, char *);
97 static int woptions(int *, char *);
98 static int merr(void *, int, int, const char *);
99 static int mwarn(void *, int, int, const char *);
100 static int ffile(struct buf *, struct buf *,
101 const char *, struct curparse *);
102 static int fdesc(struct buf *, struct buf *,
103 struct curparse *);
104 static int pset(const char *, int, struct curparse *,
105 struct man **, struct mdoc **);
106 static struct man *man_init(struct curparse *);
107 static struct mdoc *mdoc_init(struct curparse *);
108 __dead static void version(void);
109 __dead static void usage(void);
110
111 static const char *progname;
112
113
114 int
115 main(int argc, char *argv[])
116 {
117 int c, rc;
118 struct buf ln, blk;
119 struct curparse curp;
120
121 progname = strrchr(argv[0], '/');
122 if (progname == NULL)
123 progname = argv[0];
124 else
125 ++progname;
126
127 memset(&curp, 0, sizeof(struct curparse));
128
129 curp.inttype = INTT_AUTO;
130 curp.outtype = OUTT_ASCII;
131
132 /* LINTED */
133 while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:")))
134 switch (c) {
135 case ('f'):
136 if ( ! foptions(&curp.fflags, optarg))
137 return(EXIT_FAILURE);
138 break;
139 case ('m'):
140 if ( ! moptions(&curp.inttype, optarg))
141 return(EXIT_FAILURE);
142 break;
143 case ('O'):
144 (void)strlcat(curp.outopts, optarg, BUFSIZ);
145 (void)strlcat(curp.outopts, ",", BUFSIZ);
146 break;
147 case ('T'):
148 if ( ! toptions(&curp.outtype, optarg))
149 return(EXIT_FAILURE);
150 break;
151 case ('W'):
152 if ( ! woptions(&curp.wflags, optarg))
153 return(EXIT_FAILURE);
154 break;
155 case ('V'):
156 version();
157 /* NOTREACHED */
158 default:
159 usage();
160 /* NOTREACHED */
161 }
162
163 argc -= optind;
164 argv += optind;
165
166 memset(&ln, 0, sizeof(struct buf));
167 memset(&blk, 0, sizeof(struct buf));
168
169 rc = 1;
170
171 if (NULL == *argv) {
172 curp.file = "<stdin>";
173 curp.fd = STDIN_FILENO;
174
175 c = fdesc(&blk, &ln, &curp);
176 if ( ! (IGN_ERRORS & curp.fflags))
177 rc = 1 == c ? 1 : 0;
178 else
179 rc = -1 == c ? 0 : 1;
180 }
181
182 while (rc && *argv) {
183 c = ffile(&blk, &ln, *argv, &curp);
184 if ( ! (IGN_ERRORS & curp.fflags))
185 rc = 1 == c ? 1 : 0;
186 else
187 rc = -1 == c ? 0 : 1;
188
189 argv++;
190 if (*argv && rc) {
191 if (curp.lastman)
192 man_reset(curp.lastman);
193 if (curp.lastmdoc)
194 mdoc_reset(curp.lastmdoc);
195 curp.lastman = NULL;
196 curp.lastmdoc = NULL;
197 }
198 }
199
200 if (blk.buf)
201 free(blk.buf);
202 if (ln.buf)
203 free(ln.buf);
204 if (curp.outfree)
205 (*curp.outfree)(curp.outdata);
206 if (curp.mdoc)
207 mdoc_free(curp.mdoc);
208 if (curp.man)
209 man_free(curp.man);
210
211 return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
212 }
213
214
215 __dead static void
216 version(void)
217 {
218
219 (void)printf("%s %s\n", progname, VERSION);
220 exit(EXIT_SUCCESS);
221 }
222
223
224 __dead static void
225 usage(void)
226 {
227
228 (void)fprintf(stderr, "usage: %s [-V] [-foption...] "
229 "[-mformat] [-Ooption] [-Toutput] "
230 "[-Werr...]\n", progname);
231 exit(EXIT_FAILURE);
232 }
233
234
235 static struct man *
236 man_init(struct curparse *curp)
237 {
238 int pflags;
239 struct man_cb mancb;
240
241 mancb.man_err = merr;
242 mancb.man_warn = mwarn;
243
244 /* Defaults from mandoc.1. */
245
246 pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE | MAN_IGN_CHARS;
247
248 if (curp->fflags & NO_IGN_MACRO)
249 pflags &= ~MAN_IGN_MACRO;
250 if (curp->fflags & NO_IGN_CHARS)
251 pflags &= ~MAN_IGN_CHARS;
252 if (curp->fflags & NO_IGN_ESCAPE)
253 pflags &= ~MAN_IGN_ESCAPE;
254
255 return(man_alloc(curp, pflags, &mancb));
256 }
257
258
259 static struct mdoc *
260 mdoc_init(struct curparse *curp)
261 {
262 int pflags;
263 struct mdoc_cb mdoccb;
264
265 mdoccb.mdoc_err = merr;
266 mdoccb.mdoc_warn = mwarn;
267
268 /* Defaults from mandoc.1. */
269
270 pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE | MDOC_IGN_CHARS;
271
272 if (curp->fflags & IGN_SCOPE)
273 pflags |= MDOC_IGN_SCOPE;
274 if (curp->fflags & NO_IGN_ESCAPE)
275 pflags &= ~MDOC_IGN_ESCAPE;
276 if (curp->fflags & NO_IGN_MACRO)
277 pflags &= ~MDOC_IGN_MACRO;
278 if (curp->fflags & NO_IGN_CHARS)
279 pflags &= ~MDOC_IGN_CHARS;
280
281 return(mdoc_alloc(curp, pflags, &mdoccb));
282 }
283
284
285 static int
286 ffile(struct buf *blk, struct buf *ln,
287 const char *file, struct curparse *curp)
288 {
289 int c;
290
291 curp->file = file;
292 if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
293 perror(curp->file);
294 return(-1);
295 }
296
297 c = fdesc(blk, ln, curp);
298
299 if (-1 == close(curp->fd))
300 perror(curp->file);
301
302 return(c);
303 }
304
305
306 static int
307 fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
308 {
309 size_t sz;
310 ssize_t ssz;
311 struct stat st;
312 int j, i, pos, lnn, comment;
313 struct man *man;
314 struct mdoc *mdoc;
315
316 sz = BUFSIZ;
317 man = NULL;
318 mdoc = NULL;
319
320 /*
321 * Two buffers: ln and buf. buf is the input buffer optimised
322 * here for each file's block size. ln is a line buffer. Both
323 * growable, hence passed in by ptr-ptr.
324 */
325
326 if (-1 == fstat(curp->fd, &st))
327 perror(curp->file);
328 else if ((size_t)st.st_blksize > sz)
329 sz = st.st_blksize;
330
331 if (sz > blk->sz) {
332 blk->buf = realloc(blk->buf, sz);
333 if (NULL == blk->buf) {
334 perror(NULL);
335 exit(EXIT_FAILURE);
336 }
337 blk->sz = sz;
338 }
339
340 /* Fill buf with file blocksize. */
341
342 for (lnn = pos = comment = 0; ; ) {
343 if (-1 == (ssz = read(curp->fd, blk->buf, sz))) {
344 perror(curp->file);
345 return(-1);
346 } else if (0 == ssz)
347 break;
348
349 /* Parse the read block into partial or full lines. */
350
351 for (i = 0; i < (int)ssz; i++) {
352 if (pos >= (int)ln->sz) {
353 ln->sz += 256; /* Step-size. */
354 ln->buf = realloc(ln->buf, ln->sz);
355 if (NULL == ln->buf) {
356 perror(NULL);
357 return(EXIT_FAILURE);
358 }
359 }
360
361 if ('\n' != blk->buf[i]) {
362 if (comment)
363 continue;
364 ln->buf[pos++] = blk->buf[i];
365
366 /* Handle in-line `\"' comments. */
367
368 if (1 == pos || '\"' != ln->buf[pos - 1])
369 continue;
370
371 for (j = pos - 2; j >= 0; j--)
372 if ('\\' != ln->buf[j])
373 break;
374
375 if ( ! ((pos - 2 - j) % 2))
376 continue;
377
378 comment = 1;
379 pos -= 2;
380 continue;
381 }
382
383 /* Handle escaped `\\n' newlines. */
384
385 if (pos > 0 && 0 == comment &&
386 '\\' == ln->buf[pos - 1]) {
387 for (j = pos - 1; j >= 0; j--)
388 if ('\\' != ln->buf[j])
389 break;
390 if ( ! ((pos - j) % 2)) {
391 pos--;
392 lnn++;
393 continue;
394 }
395 }
396
397 ln->buf[pos] = 0;
398 lnn++;
399
400 /* If unset, assign parser in pset(). */
401
402 if ( ! (man || mdoc) && ! pset(ln->buf,
403 pos, curp, &man, &mdoc))
404 return(-1);
405
406 pos = comment = 0;
407
408 /* Pass down into parsers. */
409
410 if (man && ! man_parseln(man, lnn, ln->buf))
411 return(0);
412 if (mdoc && ! mdoc_parseln(mdoc, lnn, ln->buf))
413 return(0);
414 }
415 }
416
417 /* NOTE a parser may not have been assigned, yet. */
418
419 if ( ! (man || mdoc)) {
420 fprintf(stderr, "%s: Not a manual\n", curp->file);
421 return(0);
422 }
423
424 if (mdoc && ! mdoc_endparse(mdoc))
425 return(0);
426 if (man && ! man_endparse(man))
427 return(0);
428
429 /* If unset, allocate output dev now (if applicable). */
430
431 if ( ! (curp->outman && curp->outmdoc)) {
432 switch (curp->outtype) {
433 case (OUTT_HTML):
434 curp->outdata = html_alloc(curp->outopts);
435 curp->outman = html_man;
436 curp->outmdoc = html_mdoc;
437 curp->outfree = html_free;
438 break;
439 case (OUTT_TREE):
440 curp->outman = tree_man;
441 curp->outmdoc = tree_mdoc;
442 break;
443 case (OUTT_LINT):
444 break;
445 default:
446 curp->outdata = ascii_alloc();
447 curp->outman = terminal_man;
448 curp->outmdoc = terminal_mdoc;
449 curp->outfree = terminal_free;
450 break;
451 }
452 }
453
454 /* Execute the out device, if it exists. */
455
456 if (man && curp->outman)
457 (*curp->outman)(curp->outdata, man);
458 if (mdoc && curp->outmdoc)
459 (*curp->outmdoc)(curp->outdata, mdoc);
460
461 return(1);
462 }
463
464
465 static int
466 pset(const char *buf, int pos, struct curparse *curp,
467 struct man **man, struct mdoc **mdoc)
468 {
469 int i;
470
471 /*
472 * Try to intuit which kind of manual parser should be used. If
473 * passed in by command-line (-man, -mdoc), then use that
474 * explicitly. If passed as -mandoc, then try to guess from the
475 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
476 * default to -man, which is more lenient.
477 */
478
479 if (buf[0] == '.') {
480 for (i = 1; buf[i]; i++)
481 if (' ' != buf[i] && '\t' != buf[i])
482 break;
483 if (0 == buf[i])
484 return(1);
485 }
486
487 switch (curp->inttype) {
488 case (INTT_MDOC):
489 if (NULL == curp->mdoc)
490 curp->mdoc = mdoc_init(curp);
491 if (NULL == (*mdoc = curp->mdoc))
492 return(0);
493 curp->lastmdoc = *mdoc;
494 return(1);
495 case (INTT_MAN):
496 if (NULL == curp->man)
497 curp->man = man_init(curp);
498 if (NULL == (*man = curp->man))
499 return(0);
500 curp->lastman = *man;
501 return(1);
502 default:
503 break;
504 }
505
506 if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
507 if (NULL == curp->mdoc)
508 curp->mdoc = mdoc_init(curp);
509 if (NULL == (*mdoc = curp->mdoc))
510 return(0);
511 curp->lastmdoc = *mdoc;
512 return(1);
513 }
514
515 if (NULL == curp->man)
516 curp->man = man_init(curp);
517 if (NULL == (*man = curp->man))
518 return(0);
519 curp->lastman = *man;
520 return(1);
521 }
522
523
524 static int
525 moptions(enum intt *tflags, char *arg)
526 {
527
528 if (0 == strcmp(arg, "doc"))
529 *tflags = INTT_MDOC;
530 else if (0 == strcmp(arg, "andoc"))
531 *tflags = INTT_AUTO;
532 else if (0 == strcmp(arg, "an"))
533 *tflags = INTT_MAN;
534 else {
535 fprintf(stderr, "%s: Bad argument", arg);
536 return(0);
537 }
538
539 return(1);
540 }
541
542
543 static int
544 toptions(enum outt *tflags, char *arg)
545 {
546
547 if (0 == strcmp(arg, "ascii"))
548 *tflags = OUTT_ASCII;
549 else if (0 == strcmp(arg, "lint"))
550 *tflags = OUTT_LINT;
551 else if (0 == strcmp(arg, "tree"))
552 *tflags = OUTT_TREE;
553 else if (0 == strcmp(arg, "html"))
554 *tflags = OUTT_HTML;
555 else {
556 fprintf(stderr, "%s: Bad argument", arg);
557 return(0);
558 }
559
560 return(1);
561 }
562
563
564 static int
565 foptions(int *fflags, char *arg)
566 {
567 char *v, *o;
568 const char *toks[8];
569
570 toks[0] = "ign-scope";
571 toks[1] = "no-ign-escape";
572 toks[2] = "no-ign-macro";
573 toks[3] = "no-ign-chars";
574 toks[4] = "ign-errors";
575 toks[5] = "strict";
576 toks[6] = "ign-escape";
577 toks[7] = NULL;
578
579 while (*arg) {
580 o = arg;
581 switch (getsubopt(&arg, UNCONST(toks), &v)) {
582 case (0):
583 *fflags |= IGN_SCOPE;
584 break;
585 case (1):
586 *fflags |= NO_IGN_ESCAPE;
587 break;
588 case (2):
589 *fflags |= NO_IGN_MACRO;
590 break;
591 case (3):
592 *fflags |= NO_IGN_CHARS;
593 break;
594 case (4):
595 *fflags |= IGN_ERRORS;
596 break;
597 case (5):
598 *fflags |= NO_IGN_ESCAPE |
599 NO_IGN_MACRO | NO_IGN_CHARS;
600 break;
601 case (6):
602 *fflags &= ~NO_IGN_ESCAPE;
603 break;
604 default:
605 fprintf(stderr, "%s: Bad argument", o);
606 return(0);
607 }
608 }
609
610 return(1);
611 }
612
613
614 static int
615 woptions(int *wflags, char *arg)
616 {
617 char *v, *o;
618 const char *toks[3];
619
620 toks[0] = "all";
621 toks[1] = "error";
622 toks[2] = NULL;
623
624 while (*arg) {
625 o = arg;
626 switch (getsubopt(&arg, UNCONST(toks), &v)) {
627 case (0):
628 *wflags |= WARN_WALL;
629 break;
630 case (1):
631 *wflags |= WARN_WERR;
632 break;
633 default:
634 fprintf(stderr, "%s: Bad argument", o);
635 return(0);
636 }
637 }
638
639 return(1);
640 }
641
642
643 /* ARGSUSED */
644 static int
645 merr(void *arg, int line, int col, const char *msg)
646 {
647 struct curparse *curp;
648
649 curp = (struct curparse *)arg;
650
651 (void)fprintf(stderr, "%s:%d:%d: error: %s\n",
652 curp->file, line, col + 1, msg);
653
654 return(0);
655 }
656
657
658 static int
659 mwarn(void *arg, int line, int col, const char *msg)
660 {
661 struct curparse *curp;
662
663 curp = (struct curparse *)arg;
664
665 if ( ! (curp->wflags & WARN_WALL))
666 return(1);
667
668 (void)fprintf(stderr, "%s:%d:%d: warning: %s\n",
669 curp->file, line, col + 1, msg);
670
671 if ( ! (curp->wflags & WARN_WERR))
672 return(1);
673
674 return(0);
675 }
676