]> git.cameronkatri.com Git - mandoc.git/blob - man_validate.c
Simplify man(7) validation:
[mandoc.git] / man_validate.c
1 /* $Id: man_validate.c,v 1.104 2014/08/01 21:24:17 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <limits.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32
33 #include "man.h"
34 #include "mandoc.h"
35 #include "mandoc_aux.h"
36 #include "libman.h"
37 #include "libmandoc.h"
38
39 #define CHKARGS struct man *man, struct man_node *n
40
41 typedef int (*v_check)(CHKARGS);
42
43 static int check_eq0(CHKARGS);
44 static int check_eq2(CHKARGS);
45 static int check_le1(CHKARGS);
46 static int check_ge2(CHKARGS);
47 static int check_le5(CHKARGS);
48 static int check_par(CHKARGS);
49 static int check_part(CHKARGS);
50 static int check_root(CHKARGS);
51 static int check_text(CHKARGS);
52
53 static int post_AT(CHKARGS);
54 static int post_IP(CHKARGS);
55 static int post_vs(CHKARGS);
56 static int post_fi(CHKARGS);
57 static int post_ft(CHKARGS);
58 static int post_nf(CHKARGS);
59 static int post_TH(CHKARGS);
60 static int post_UC(CHKARGS);
61 static int post_UR(CHKARGS);
62
63 static v_check man_valids[MAN_MAX] = {
64 post_vs, /* br */
65 post_TH, /* TH */
66 NULL, /* SH */
67 NULL, /* SS */
68 NULL, /* TP */
69 check_par, /* LP */
70 check_par, /* PP */
71 check_par, /* P */
72 post_IP, /* IP */
73 NULL, /* HP */
74 NULL, /* SM */
75 NULL, /* SB */
76 NULL, /* BI */
77 NULL, /* IB */
78 NULL, /* BR */
79 NULL, /* RB */
80 NULL, /* R */
81 NULL, /* B */
82 NULL, /* I */
83 NULL, /* IR */
84 NULL, /* RI */
85 check_eq0, /* na */
86 post_vs, /* sp */
87 post_nf, /* nf */
88 post_fi, /* fi */
89 NULL, /* RE */
90 check_part, /* RS */
91 NULL, /* DT */
92 post_UC, /* UC */
93 check_le1, /* PD */
94 post_AT, /* AT */
95 NULL, /* in */
96 post_ft, /* ft */
97 check_eq2, /* OP */
98 post_nf, /* EX */
99 post_fi, /* EE */
100 post_UR, /* UR */
101 NULL, /* UE */
102 NULL, /* ll */
103 };
104
105
106 int
107 man_valid_post(struct man *man)
108 {
109 struct man_node *n;
110 v_check *cp;
111
112 n = man->last;
113 if (n->flags & MAN_VALID)
114 return(1);
115 n->flags |= MAN_VALID;
116
117 switch (n->type) {
118 case MAN_TEXT:
119 return(check_text(man, n));
120 case MAN_ROOT:
121 return(check_root(man, n));
122 case MAN_EQN:
123 /* FALLTHROUGH */
124 case MAN_TBL:
125 return(1);
126 default:
127 cp = man_valids + n->tok;
128 return(*cp ? (*cp)(man, n) : 1);
129 }
130 }
131
132 static int
133 check_root(CHKARGS)
134 {
135
136 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
137
138 if (NULL == man->first->child)
139 mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse,
140 n->line, n->pos, NULL);
141 else
142 man->meta.hasbody = 1;
143
144 if (NULL == man->meta.title) {
145 mandoc_msg(MANDOCERR_TH_MISSING, man->parse,
146 n->line, n->pos, NULL);
147
148 /*
149 * If a title hasn't been set, do so now (by
150 * implication, date and section also aren't set).
151 */
152
153 man->meta.title = mandoc_strdup("unknown");
154 man->meta.msec = mandoc_strdup("1");
155 man->meta.date = man->quick ? mandoc_strdup("") :
156 mandoc_normdate(man->parse, NULL, n->line, n->pos);
157 }
158
159 return(1);
160 }
161
162 static int
163 check_text(CHKARGS)
164 {
165 char *cp, *p;
166
167 if (MAN_LITERAL & man->flags)
168 return(1);
169
170 cp = n->string;
171 for (p = cp; NULL != (p = strchr(p, '\t')); p++)
172 mandoc_msg(MANDOCERR_FI_TAB, man->parse,
173 n->line, n->pos + (p - cp), NULL);
174 return(1);
175 }
176
177 #define INEQ_DEFINE(x, ineq, name) \
178 static int \
179 check_##name(CHKARGS) \
180 { \
181 if (n->nchild ineq (x)) \
182 return(1); \
183 mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, n->pos, \
184 "line arguments %s %d (have %d)", \
185 #ineq, (x), n->nchild); \
186 return(1); \
187 }
188
189 INEQ_DEFINE(0, ==, eq0)
190 INEQ_DEFINE(2, ==, eq2)
191 INEQ_DEFINE(1, <=, le1)
192 INEQ_DEFINE(2, >=, ge2)
193 INEQ_DEFINE(5, <=, le5)
194
195 static int
196 post_UR(CHKARGS)
197 {
198
199 if (MAN_HEAD == n->type && 1 != n->nchild)
200 mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line,
201 n->pos, "line arguments eq 1 (have %d)", n->nchild);
202
203 return(check_part(man, n));
204 }
205
206 static int
207 post_ft(CHKARGS)
208 {
209 char *cp;
210 int ok;
211
212 if (0 == n->nchild)
213 return(1);
214
215 ok = 0;
216 cp = n->child->string;
217 switch (*cp) {
218 case '1':
219 /* FALLTHROUGH */
220 case '2':
221 /* FALLTHROUGH */
222 case '3':
223 /* FALLTHROUGH */
224 case '4':
225 /* FALLTHROUGH */
226 case 'I':
227 /* FALLTHROUGH */
228 case 'P':
229 /* FALLTHROUGH */
230 case 'R':
231 if ('\0' == cp[1])
232 ok = 1;
233 break;
234 case 'B':
235 if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2]))
236 ok = 1;
237 break;
238 case 'C':
239 if ('W' == cp[1] && '\0' == cp[2])
240 ok = 1;
241 break;
242 default:
243 break;
244 }
245
246 if (0 == ok) {
247 mandoc_vmsg(MANDOCERR_FT_BAD, man->parse,
248 n->line, n->pos, "ft %s", cp);
249 *cp = '\0';
250 }
251
252 if (1 < n->nchild)
253 mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line,
254 n->pos, "want one child (have %d)", n->nchild);
255
256 return(1);
257 }
258
259 static int
260 check_part(CHKARGS)
261 {
262
263 if (MAN_BODY == n->type && 0 == n->nchild)
264 mandoc_msg(MANDOCERR_ARGCWARN, man->parse, n->line,
265 n->pos, "want children (have none)");
266
267 return(1);
268 }
269
270 static int
271 check_par(CHKARGS)
272 {
273
274 switch (n->type) {
275 case MAN_BLOCK:
276 if (0 == n->body->nchild)
277 man_node_delete(man, n);
278 break;
279 case MAN_BODY:
280 if (0 == n->nchild)
281 mandoc_vmsg(MANDOCERR_PAR_SKIP,
282 man->parse, n->line, n->pos,
283 "%s empty", man_macronames[n->tok]);
284 break;
285 case MAN_HEAD:
286 if (n->nchild)
287 mandoc_vmsg(MANDOCERR_ARG_SKIP,
288 man->parse, n->line, n->pos,
289 "%s %s%s", man_macronames[n->tok],
290 n->child->string,
291 n->nchild > 1 ? " ..." : "");
292 break;
293 default:
294 break;
295 }
296
297 return(1);
298 }
299
300 static int
301 post_IP(CHKARGS)
302 {
303
304 switch (n->type) {
305 case MAN_BLOCK:
306 if (0 == n->head->nchild && 0 == n->body->nchild)
307 man_node_delete(man, n);
308 break;
309 case MAN_BODY:
310 if (0 == n->parent->head->nchild && 0 == n->nchild)
311 mandoc_vmsg(MANDOCERR_PAR_SKIP,
312 man->parse, n->line, n->pos,
313 "%s empty", man_macronames[n->tok]);
314 break;
315 default:
316 break;
317 }
318 return(1);
319 }
320
321 static int
322 post_TH(CHKARGS)
323 {
324 struct man_node *nb;
325 const char *p;
326
327 check_ge2(man, n);
328 check_le5(man, n);
329
330 free(man->meta.title);
331 free(man->meta.vol);
332 free(man->meta.source);
333 free(man->meta.msec);
334 free(man->meta.date);
335
336 man->meta.title = man->meta.vol = man->meta.date =
337 man->meta.msec = man->meta.source = NULL;
338
339 nb = n;
340
341 /* ->TITLE<- MSEC DATE SOURCE VOL */
342
343 n = n->child;
344 if (n && n->string) {
345 for (p = n->string; '\0' != *p; p++) {
346 /* Only warn about this once... */
347 if (isalpha((unsigned char)*p) &&
348 ! isupper((unsigned char)*p)) {
349 mandoc_vmsg(MANDOCERR_TITLE_CASE,
350 man->parse, n->line,
351 n->pos + (p - n->string),
352 "TH %s", n->string);
353 break;
354 }
355 }
356 man->meta.title = mandoc_strdup(n->string);
357 } else
358 man->meta.title = mandoc_strdup("");
359
360 /* TITLE ->MSEC<- DATE SOURCE VOL */
361
362 if (n)
363 n = n->next;
364 if (n && n->string)
365 man->meta.msec = mandoc_strdup(n->string);
366 else
367 man->meta.msec = mandoc_strdup("");
368
369 /* TITLE MSEC ->DATE<- SOURCE VOL */
370
371 if (n)
372 n = n->next;
373 if (n && n->string && '\0' != n->string[0]) {
374 man->meta.date = man->quick ?
375 mandoc_strdup(n->string) :
376 mandoc_normdate(man->parse, n->string,
377 n->line, n->pos);
378 } else {
379 man->meta.date = mandoc_strdup("");
380 mandoc_msg(MANDOCERR_DATE_MISSING, man->parse,
381 n ? n->line : nb->line,
382 n ? n->pos : nb->pos, "TH");
383 }
384
385 /* TITLE MSEC DATE ->SOURCE<- VOL */
386
387 if (n && (n = n->next))
388 man->meta.source = mandoc_strdup(n->string);
389
390 /* TITLE MSEC DATE SOURCE ->VOL<- */
391 /* If missing, use the default VOL name for MSEC. */
392
393 if (n && (n = n->next))
394 man->meta.vol = mandoc_strdup(n->string);
395 else if ('\0' != man->meta.msec[0] &&
396 (NULL != (p = mandoc_a2msec(man->meta.msec))))
397 man->meta.vol = mandoc_strdup(p);
398
399 /*
400 * Remove the `TH' node after we've processed it for our
401 * meta-data.
402 */
403 man_node_delete(man, man->last);
404 return(1);
405 }
406
407 static int
408 post_nf(CHKARGS)
409 {
410
411 check_eq0(man, n);
412
413 if (MAN_LITERAL & man->flags)
414 mandoc_msg(MANDOCERR_NF_SKIP, man->parse,
415 n->line, n->pos, "nf");
416
417 man->flags |= MAN_LITERAL;
418 return(1);
419 }
420
421 static int
422 post_fi(CHKARGS)
423 {
424
425 check_eq0(man, n);
426
427 if ( ! (MAN_LITERAL & man->flags))
428 mandoc_msg(MANDOCERR_FI_SKIP, man->parse,
429 n->line, n->pos, "fi");
430
431 man->flags &= ~MAN_LITERAL;
432 return(1);
433 }
434
435 static int
436 post_UC(CHKARGS)
437 {
438 static const char * const bsd_versions[] = {
439 "3rd Berkeley Distribution",
440 "4th Berkeley Distribution",
441 "4.2 Berkeley Distribution",
442 "4.3 Berkeley Distribution",
443 "4.4 Berkeley Distribution",
444 };
445
446 const char *p, *s;
447
448 n = n->child;
449
450 if (NULL == n || MAN_TEXT != n->type)
451 p = bsd_versions[0];
452 else {
453 s = n->string;
454 if (0 == strcmp(s, "3"))
455 p = bsd_versions[0];
456 else if (0 == strcmp(s, "4"))
457 p = bsd_versions[1];
458 else if (0 == strcmp(s, "5"))
459 p = bsd_versions[2];
460 else if (0 == strcmp(s, "6"))
461 p = bsd_versions[3];
462 else if (0 == strcmp(s, "7"))
463 p = bsd_versions[4];
464 else
465 p = bsd_versions[0];
466 }
467
468 free(man->meta.source);
469 man->meta.source = mandoc_strdup(p);
470 return(1);
471 }
472
473 static int
474 post_AT(CHKARGS)
475 {
476 static const char * const unix_versions[] = {
477 "7th Edition",
478 "System III",
479 "System V",
480 "System V Release 2",
481 };
482
483 const char *p, *s;
484 struct man_node *nn;
485
486 n = n->child;
487
488 if (NULL == n || MAN_TEXT != n->type)
489 p = unix_versions[0];
490 else {
491 s = n->string;
492 if (0 == strcmp(s, "3"))
493 p = unix_versions[0];
494 else if (0 == strcmp(s, "4"))
495 p = unix_versions[1];
496 else if (0 == strcmp(s, "5")) {
497 nn = n->next;
498 if (nn && MAN_TEXT == nn->type && nn->string[0])
499 p = unix_versions[3];
500 else
501 p = unix_versions[2];
502 } else
503 p = unix_versions[0];
504 }
505
506 free(man->meta.source);
507 man->meta.source = mandoc_strdup(p);
508 return(1);
509 }
510
511 static int
512 post_vs(CHKARGS)
513 {
514
515 if (n->tok == MAN_br)
516 check_eq0(man, n);
517 else
518 check_le1(man, n);
519
520 if (NULL != n->prev)
521 return(1);
522
523 switch (n->parent->tok) {
524 case MAN_SH:
525 /* FALLTHROUGH */
526 case MAN_SS:
527 mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos,
528 "%s after %s", man_macronames[n->tok],
529 man_macronames[n->parent->tok]);
530 /* FALLTHROUGH */
531 case MAN_MAX:
532 /*
533 * Don't warn about this because it occurs in pod2man
534 * and would cause considerable (unfixable) warnage.
535 */
536 man_node_delete(man, n);
537 break;
538 default:
539 break;
540 }
541
542 return(1);
543 }