]> git.cameronkatri.com Git - mandoc.git/blob - man_validate.c
Continue parser unification:
[mandoc.git] / man_validate.c
1 /* $OpenBSD$ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <errno.h>
25 #include <limits.h>
26 #include <stdarg.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <time.h>
30
31 #include "mandoc_aux.h"
32 #include "mandoc.h"
33 #include "roff.h"
34 #include "man.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libman.h"
38
39 #define CHKARGS struct roff_man *man, struct roff_node *n
40
41 typedef void (*v_check)(CHKARGS);
42
43 static void check_par(CHKARGS);
44 static void check_part(CHKARGS);
45 static void check_root(CHKARGS);
46 static void check_text(CHKARGS);
47
48 static void post_AT(CHKARGS);
49 static void post_IP(CHKARGS);
50 static void post_vs(CHKARGS);
51 static void post_ft(CHKARGS);
52 static void post_OP(CHKARGS);
53 static void post_TH(CHKARGS);
54 static void post_UC(CHKARGS);
55 static void post_UR(CHKARGS);
56
57 static const v_check __man_valids[MAN_MAX - MAN_TH] = {
58 post_TH, /* TH */
59 NULL, /* SH */
60 NULL, /* SS */
61 NULL, /* TP */
62 check_par, /* LP */
63 check_par, /* PP */
64 check_par, /* P */
65 post_IP, /* IP */
66 NULL, /* HP */
67 NULL, /* SM */
68 NULL, /* SB */
69 NULL, /* BI */
70 NULL, /* IB */
71 NULL, /* BR */
72 NULL, /* RB */
73 NULL, /* R */
74 NULL, /* B */
75 NULL, /* I */
76 NULL, /* IR */
77 NULL, /* RI */
78 post_vs, /* br */
79 post_vs, /* sp */
80 NULL, /* nf */
81 NULL, /* fi */
82 NULL, /* RE */
83 check_part, /* RS */
84 NULL, /* DT */
85 post_UC, /* UC */
86 NULL, /* PD */
87 post_AT, /* AT */
88 NULL, /* in */
89 post_ft, /* ft */
90 post_OP, /* OP */
91 NULL, /* EX */
92 NULL, /* EE */
93 post_UR, /* UR */
94 NULL, /* UE */
95 NULL, /* ll */
96 };
97 static const v_check *man_valids = __man_valids - MAN_TH;
98
99
100 void
101 man_node_validate(struct roff_man *man)
102 {
103 struct roff_node *n;
104 const v_check *cp;
105
106 n = man->last;
107 man->last = man->last->child;
108 while (man->last != NULL) {
109 man_node_validate(man);
110 if (man->last == n)
111 man->last = man->last->child;
112 else
113 man->last = man->last->next;
114 }
115
116 man->last = n;
117 man->next = ROFF_NEXT_SIBLING;
118 switch (n->type) {
119 case ROFFT_TEXT:
120 check_text(man, n);
121 break;
122 case ROFFT_ROOT:
123 check_root(man, n);
124 break;
125 case ROFFT_EQN:
126 case ROFFT_TBL:
127 break;
128 default:
129 cp = man_valids + n->tok;
130 if (*cp)
131 (*cp)(man, n);
132 if (man->last == n)
133 man_state(man, n);
134 break;
135 }
136 }
137
138 static void
139 check_root(CHKARGS)
140 {
141
142 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
143
144 if (NULL == man->first->child)
145 mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse,
146 n->line, n->pos, NULL);
147 else
148 man->meta.hasbody = 1;
149
150 if (NULL == man->meta.title) {
151 mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
152 n->line, n->pos, NULL);
153
154 /*
155 * If a title hasn't been set, do so now (by
156 * implication, date and section also aren't set).
157 */
158
159 man->meta.title = mandoc_strdup("");
160 man->meta.msec = mandoc_strdup("");
161 man->meta.date = man->quick ? mandoc_strdup("") :
162 mandoc_normdate(man->parse, NULL, n->line, n->pos);
163 }
164 }
165
166 static void
167 check_text(CHKARGS)
168 {
169 char *cp, *p;
170
171 if (MAN_LITERAL & man->flags)
172 return;
173
174 cp = n->string;
175 for (p = cp; NULL != (p = strchr(p, '\t')); p++)
176 mandoc_msg(MANDOCERR_FI_TAB, man->parse,
177 n->line, n->pos + (p - cp), NULL);
178 }
179
180 static void
181 post_OP(CHKARGS)
182 {
183
184 if (n->child == NULL)
185 mandoc_msg(MANDOCERR_OP_EMPTY, man->parse,
186 n->line, n->pos, "OP");
187 else if (n->child->next != NULL && n->child->next->next != NULL) {
188 n = n->child->next->next;
189 mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
190 n->line, n->pos, "OP ... %s", n->string);
191 }
192 }
193
194 static void
195 post_UR(CHKARGS)
196 {
197
198 if (n->type == ROFFT_HEAD && n->child == NULL)
199 mandoc_vmsg(MANDOCERR_UR_NOHEAD, man->parse,
200 n->line, n->pos, "UR");
201 check_part(man, n);
202 }
203
204 static void
205 post_ft(CHKARGS)
206 {
207 char *cp;
208 int ok;
209
210 if (n->child == NULL)
211 return;
212
213 ok = 0;
214 cp = n->child->string;
215 switch (*cp) {
216 case '1':
217 case '2':
218 case '3':
219 case '4':
220 case 'I':
221 case 'P':
222 case 'R':
223 if ('\0' == cp[1])
224 ok = 1;
225 break;
226 case 'B':
227 if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2]))
228 ok = 1;
229 break;
230 case 'C':
231 if ('W' == cp[1] && '\0' == cp[2])
232 ok = 1;
233 break;
234 default:
235 break;
236 }
237
238 if (0 == ok) {
239 mandoc_vmsg(MANDOCERR_FT_BAD, man->parse,
240 n->line, n->pos, "ft %s", cp);
241 *cp = '\0';
242 }
243 }
244
245 static void
246 check_part(CHKARGS)
247 {
248
249 if (n->type == ROFFT_BODY && n->child == NULL)
250 mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse,
251 n->line, n->pos, roff_name[n->tok]);
252 }
253
254 static void
255 check_par(CHKARGS)
256 {
257
258 switch (n->type) {
259 case ROFFT_BLOCK:
260 if (n->body->child == NULL)
261 roff_node_delete(man, n);
262 break;
263 case ROFFT_BODY:
264 if (n->child == NULL)
265 mandoc_vmsg(MANDOCERR_PAR_SKIP,
266 man->parse, n->line, n->pos,
267 "%s empty", roff_name[n->tok]);
268 break;
269 case ROFFT_HEAD:
270 if (n->child != NULL)
271 mandoc_vmsg(MANDOCERR_ARG_SKIP,
272 man->parse, n->line, n->pos, "%s %s%s",
273 roff_name[n->tok], n->child->string,
274 n->child->next != NULL ? " ..." : "");
275 break;
276 default:
277 break;
278 }
279 }
280
281 static void
282 post_IP(CHKARGS)
283 {
284
285 switch (n->type) {
286 case ROFFT_BLOCK:
287 if (n->head->child == NULL && n->body->child == NULL)
288 roff_node_delete(man, n);
289 break;
290 case ROFFT_BODY:
291 if (n->parent->head->child == NULL && n->child == NULL)
292 mandoc_vmsg(MANDOCERR_PAR_SKIP,
293 man->parse, n->line, n->pos,
294 "%s empty", roff_name[n->tok]);
295 break;
296 default:
297 break;
298 }
299 }
300
301 static void
302 post_TH(CHKARGS)
303 {
304 struct roff_node *nb;
305 const char *p;
306
307 free(man->meta.title);
308 free(man->meta.vol);
309 free(man->meta.os);
310 free(man->meta.msec);
311 free(man->meta.date);
312
313 man->meta.title = man->meta.vol = man->meta.date =
314 man->meta.msec = man->meta.os = NULL;
315
316 nb = n;
317
318 /* ->TITLE<- MSEC DATE OS VOL */
319
320 n = n->child;
321 if (n && n->string) {
322 for (p = n->string; '\0' != *p; p++) {
323 /* Only warn about this once... */
324 if (isalpha((unsigned char)*p) &&
325 ! isupper((unsigned char)*p)) {
326 mandoc_vmsg(MANDOCERR_TITLE_CASE,
327 man->parse, n->line,
328 n->pos + (p - n->string),
329 "TH %s", n->string);
330 break;
331 }
332 }
333 man->meta.title = mandoc_strdup(n->string);
334 } else {
335 man->meta.title = mandoc_strdup("");
336 mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
337 nb->line, nb->pos, "TH");
338 }
339
340 /* TITLE ->MSEC<- DATE OS VOL */
341
342 if (n)
343 n = n->next;
344 if (n && n->string)
345 man->meta.msec = mandoc_strdup(n->string);
346 else {
347 man->meta.msec = mandoc_strdup("");
348 mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse,
349 nb->line, nb->pos, "TH %s", man->meta.title);
350 }
351
352 /* TITLE MSEC ->DATE<- OS VOL */
353
354 if (n)
355 n = n->next;
356 if (n && n->string && '\0' != n->string[0]) {
357 man->meta.date = man->quick ?
358 mandoc_strdup(n->string) :
359 mandoc_normdate(man->parse, n->string,
360 n->line, n->pos);
361 } else {
362 man->meta.date = mandoc_strdup("");
363 mandoc_msg(MANDOCERR_DATE_MISSING, man->parse,
364 n ? n->line : nb->line,
365 n ? n->pos : nb->pos, "TH");
366 }
367
368 /* TITLE MSEC DATE ->OS<- VOL */
369
370 if (n && (n = n->next))
371 man->meta.os = mandoc_strdup(n->string);
372 else if (man->defos != NULL)
373 man->meta.os = mandoc_strdup(man->defos);
374
375 /* TITLE MSEC DATE OS ->VOL<- */
376 /* If missing, use the default VOL name for MSEC. */
377
378 if (n && (n = n->next))
379 man->meta.vol = mandoc_strdup(n->string);
380 else if ('\0' != man->meta.msec[0] &&
381 (NULL != (p = mandoc_a2msec(man->meta.msec))))
382 man->meta.vol = mandoc_strdup(p);
383
384 if (n != NULL && (n = n->next) != NULL)
385 mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
386 n->line, n->pos, "TH ... %s", n->string);
387
388 /*
389 * Remove the `TH' node after we've processed it for our
390 * meta-data.
391 */
392 roff_node_delete(man, man->last);
393 }
394
395 static void
396 post_UC(CHKARGS)
397 {
398 static const char * const bsd_versions[] = {
399 "3rd Berkeley Distribution",
400 "4th Berkeley Distribution",
401 "4.2 Berkeley Distribution",
402 "4.3 Berkeley Distribution",
403 "4.4 Berkeley Distribution",
404 };
405
406 const char *p, *s;
407
408 n = n->child;
409
410 if (n == NULL || n->type != ROFFT_TEXT)
411 p = bsd_versions[0];
412 else {
413 s = n->string;
414 if (0 == strcmp(s, "3"))
415 p = bsd_versions[0];
416 else if (0 == strcmp(s, "4"))
417 p = bsd_versions[1];
418 else if (0 == strcmp(s, "5"))
419 p = bsd_versions[2];
420 else if (0 == strcmp(s, "6"))
421 p = bsd_versions[3];
422 else if (0 == strcmp(s, "7"))
423 p = bsd_versions[4];
424 else
425 p = bsd_versions[0];
426 }
427
428 free(man->meta.os);
429 man->meta.os = mandoc_strdup(p);
430 }
431
432 static void
433 post_AT(CHKARGS)
434 {
435 static const char * const unix_versions[] = {
436 "7th Edition",
437 "System III",
438 "System V",
439 "System V Release 2",
440 };
441
442 struct roff_node *nn;
443 const char *p, *s;
444
445 n = n->child;
446
447 if (n == NULL || n->type != ROFFT_TEXT)
448 p = unix_versions[0];
449 else {
450 s = n->string;
451 if (0 == strcmp(s, "3"))
452 p = unix_versions[0];
453 else if (0 == strcmp(s, "4"))
454 p = unix_versions[1];
455 else if (0 == strcmp(s, "5")) {
456 nn = n->next;
457 if (nn != NULL &&
458 nn->type == ROFFT_TEXT &&
459 nn->string[0] != '\0')
460 p = unix_versions[3];
461 else
462 p = unix_versions[2];
463 } else
464 p = unix_versions[0];
465 }
466
467 free(man->meta.os);
468 man->meta.os = mandoc_strdup(p);
469 }
470
471 static void
472 post_vs(CHKARGS)
473 {
474
475 if (NULL != n->prev)
476 return;
477
478 switch (n->parent->tok) {
479 case MAN_SH:
480 case MAN_SS:
481 mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos,
482 "%s after %s", roff_name[n->tok],
483 roff_name[n->parent->tok]);
484 /* FALLTHROUGH */
485 case TOKEN_NONE:
486 /*
487 * Don't warn about this because it occurs in pod2man
488 * and would cause considerable (unfixable) warnage.
489 */
490 roff_node_delete(man, n);
491 break;
492 default:
493 break;
494 }
495 }