]> git.cameronkatri.com Git - mandoc.git/blob - roff.h
Parser unification: use nice ohashes for all three request and macro tables;
[mandoc.git] / roff.h
1 /* $Id: roff.h,v 1.42 2017/04/29 12:45:42 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 struct ohash;
20 struct mdoc_arg;
21 union mdoc_data;
22
23 enum roff_macroset {
24 MACROSET_NONE = 0,
25 MACROSET_MDOC,
26 MACROSET_MAN
27 };
28
29 enum roff_sec {
30 SEC_NONE = 0,
31 SEC_NAME,
32 SEC_LIBRARY,
33 SEC_SYNOPSIS,
34 SEC_DESCRIPTION,
35 SEC_CONTEXT,
36 SEC_IMPLEMENTATION, /* IMPLEMENTATION NOTES */
37 SEC_RETURN_VALUES,
38 SEC_ENVIRONMENT,
39 SEC_FILES,
40 SEC_EXIT_STATUS,
41 SEC_EXAMPLES,
42 SEC_DIAGNOSTICS,
43 SEC_COMPATIBILITY,
44 SEC_ERRORS,
45 SEC_SEE_ALSO,
46 SEC_STANDARDS,
47 SEC_HISTORY,
48 SEC_AUTHORS,
49 SEC_CAVEATS,
50 SEC_BUGS,
51 SEC_SECURITY,
52 SEC_CUSTOM,
53 SEC__MAX
54 };
55
56 enum roff_type {
57 ROFFT_ROOT,
58 ROFFT_BLOCK,
59 ROFFT_HEAD,
60 ROFFT_BODY,
61 ROFFT_TAIL,
62 ROFFT_ELEM,
63 ROFFT_TEXT,
64 ROFFT_TBL,
65 ROFFT_EQN
66 };
67
68 enum roff_tok {
69 ROFF_ab = 0,
70 ROFF_ad,
71 ROFF_af,
72 ROFF_aln,
73 ROFF_als,
74 ROFF_am,
75 ROFF_am1,
76 ROFF_ami,
77 ROFF_ami1,
78 ROFF_as,
79 ROFF_as1,
80 ROFF_asciify,
81 ROFF_backtrace,
82 ROFF_bd,
83 ROFF_bleedat,
84 ROFF_blm,
85 ROFF_box,
86 ROFF_boxa,
87 ROFF_bp,
88 ROFF_BP,
89 /* MAN_br, MDOC_br */
90 ROFF_break,
91 ROFF_breakchar,
92 ROFF_brnl,
93 ROFF_brp,
94 ROFF_brpnl,
95 ROFF_c2,
96 ROFF_cc,
97 ROFF_ce,
98 ROFF_cf,
99 ROFF_cflags,
100 ROFF_ch,
101 ROFF_char,
102 ROFF_chop,
103 ROFF_class,
104 ROFF_close,
105 ROFF_CL,
106 ROFF_color,
107 ROFF_composite,
108 ROFF_continue,
109 ROFF_cp,
110 ROFF_cropat,
111 ROFF_cs,
112 ROFF_cu,
113 ROFF_da,
114 ROFF_dch,
115 ROFF_Dd,
116 ROFF_de,
117 ROFF_de1,
118 ROFF_defcolor,
119 ROFF_dei,
120 ROFF_dei1,
121 ROFF_device,
122 ROFF_devicem,
123 ROFF_di,
124 ROFF_do,
125 ROFF_ds,
126 ROFF_ds1,
127 ROFF_dwh,
128 ROFF_dt,
129 ROFF_ec,
130 ROFF_ecr,
131 ROFF_ecs,
132 ROFF_el,
133 ROFF_em,
134 ROFF_EN,
135 ROFF_eo,
136 ROFF_EP,
137 ROFF_EQ,
138 ROFF_errprint,
139 ROFF_ev,
140 ROFF_evc,
141 ROFF_ex,
142 ROFF_fallback,
143 ROFF_fam,
144 ROFF_fc,
145 ROFF_fchar,
146 ROFF_fcolor,
147 ROFF_fdeferlig,
148 ROFF_feature,
149 /* MAN_fi; ignored in mdoc(7) */
150 ROFF_fkern,
151 ROFF_fl,
152 ROFF_flig,
153 ROFF_fp,
154 ROFF_fps,
155 ROFF_fschar,
156 ROFF_fspacewidth,
157 ROFF_fspecial,
158 /* MAN_ft; ignored in mdoc(7) */
159 ROFF_ftr,
160 ROFF_fzoom,
161 ROFF_gcolor,
162 ROFF_hc,
163 ROFF_hcode,
164 ROFF_hidechar,
165 ROFF_hla,
166 ROFF_hlm,
167 ROFF_hpf,
168 ROFF_hpfa,
169 ROFF_hpfcode,
170 ROFF_hw,
171 ROFF_hy,
172 ROFF_hylang,
173 ROFF_hylen,
174 ROFF_hym,
175 ROFF_hypp,
176 ROFF_hys,
177 ROFF_ie,
178 ROFF_if,
179 ROFF_ig,
180 /* MAN_in; ignored in mdoc(7) */
181 ROFF_index,
182 ROFF_it,
183 ROFF_itc,
184 ROFF_IX,
185 ROFF_kern,
186 ROFF_kernafter,
187 ROFF_kernbefore,
188 ROFF_kernpair,
189 ROFF_lc,
190 ROFF_lc_ctype,
191 ROFF_lds,
192 ROFF_length,
193 ROFF_letadj,
194 ROFF_lf,
195 ROFF_lg,
196 ROFF_lhang,
197 ROFF_linetabs,
198 /* MAN_ll, MDOC_ll */
199 ROFF_lnr,
200 ROFF_lnrf,
201 ROFF_lpfx,
202 ROFF_ls,
203 ROFF_lsm,
204 ROFF_lt,
205 ROFF_mc,
206 ROFF_mediasize,
207 ROFF_minss,
208 ROFF_mk,
209 ROFF_mso,
210 ROFF_na,
211 ROFF_ne,
212 /* MAN_nf; ignored in mdoc(7) */
213 ROFF_nh,
214 ROFF_nhychar,
215 ROFF_nm,
216 ROFF_nn,
217 ROFF_nop,
218 ROFF_nr,
219 ROFF_nrf,
220 ROFF_nroff,
221 ROFF_ns,
222 ROFF_nx,
223 ROFF_open,
224 ROFF_opena,
225 ROFF_os,
226 ROFF_output,
227 ROFF_padj,
228 ROFF_papersize,
229 ROFF_pc,
230 ROFF_pev,
231 ROFF_pi,
232 ROFF_PI,
233 ROFF_pl,
234 ROFF_pm,
235 ROFF_pn,
236 ROFF_pnr,
237 ROFF_po,
238 ROFF_ps,
239 ROFF_psbb,
240 ROFF_pshape,
241 ROFF_pso,
242 ROFF_ptr,
243 ROFF_pvs,
244 ROFF_rchar,
245 ROFF_rd,
246 ROFF_recursionlimit,
247 ROFF_return,
248 ROFF_rfschar,
249 ROFF_rhang,
250 ROFF_rj,
251 ROFF_rm,
252 ROFF_rn,
253 ROFF_rnn,
254 ROFF_rr,
255 ROFF_rs,
256 ROFF_rt,
257 ROFF_schar,
258 ROFF_sentchar,
259 ROFF_shc,
260 ROFF_shift,
261 ROFF_sizes,
262 ROFF_so,
263 /* MAN_sp, MDOC_sp */
264 ROFF_spacewidth,
265 ROFF_special,
266 ROFF_spreadwarn,
267 ROFF_ss,
268 ROFF_sty,
269 ROFF_substring,
270 ROFF_sv,
271 ROFF_sy,
272 ROFF_T_,
273 ROFF_ta,
274 ROFF_tc,
275 ROFF_TE,
276 ROFF_TH,
277 ROFF_ti,
278 ROFF_tkf,
279 ROFF_tl,
280 ROFF_tm,
281 ROFF_tm1,
282 ROFF_tmc,
283 ROFF_tr,
284 ROFF_track,
285 ROFF_transchar,
286 ROFF_trf,
287 ROFF_trimat,
288 ROFF_trin,
289 ROFF_trnt,
290 ROFF_troff,
291 ROFF_TS,
292 ROFF_uf,
293 ROFF_ul,
294 ROFF_unformat,
295 ROFF_unwatch,
296 ROFF_unwatchn,
297 ROFF_vpt,
298 ROFF_vs,
299 ROFF_warn,
300 ROFF_warnscale,
301 ROFF_watch,
302 ROFF_watchlength,
303 ROFF_watchn,
304 ROFF_wh,
305 ROFF_while,
306 ROFF_write,
307 ROFF_writec,
308 ROFF_writem,
309 ROFF_xflag,
310 ROFF_cblock,
311 ROFF_USERDEF,
312 TOKEN_NONE,
313 MDOC_Dd,
314 MDOC_Dt,
315 MDOC_Os,
316 MDOC_Sh,
317 MDOC_Ss,
318 MDOC_Pp,
319 MDOC_D1,
320 MDOC_Dl,
321 MDOC_Bd,
322 MDOC_Ed,
323 MDOC_Bl,
324 MDOC_El,
325 MDOC_It,
326 MDOC_Ad,
327 MDOC_An,
328 MDOC_Ap,
329 MDOC_Ar,
330 MDOC_Cd,
331 MDOC_Cm,
332 MDOC_Dv,
333 MDOC_Er,
334 MDOC_Ev,
335 MDOC_Ex,
336 MDOC_Fa,
337 MDOC_Fd,
338 MDOC_Fl,
339 MDOC_Fn,
340 MDOC_Ft,
341 MDOC_Ic,
342 MDOC_In,
343 MDOC_Li,
344 MDOC_Nd,
345 MDOC_Nm,
346 MDOC_Op,
347 MDOC_Ot,
348 MDOC_Pa,
349 MDOC_Rv,
350 MDOC_St,
351 MDOC_Va,
352 MDOC_Vt,
353 MDOC_Xr,
354 MDOC__A,
355 MDOC__B,
356 MDOC__D,
357 MDOC__I,
358 MDOC__J,
359 MDOC__N,
360 MDOC__O,
361 MDOC__P,
362 MDOC__R,
363 MDOC__T,
364 MDOC__V,
365 MDOC_Ac,
366 MDOC_Ao,
367 MDOC_Aq,
368 MDOC_At,
369 MDOC_Bc,
370 MDOC_Bf,
371 MDOC_Bo,
372 MDOC_Bq,
373 MDOC_Bsx,
374 MDOC_Bx,
375 MDOC_Db,
376 MDOC_Dc,
377 MDOC_Do,
378 MDOC_Dq,
379 MDOC_Ec,
380 MDOC_Ef,
381 MDOC_Em,
382 MDOC_Eo,
383 MDOC_Fx,
384 MDOC_Ms,
385 MDOC_No,
386 MDOC_Ns,
387 MDOC_Nx,
388 MDOC_Ox,
389 MDOC_Pc,
390 MDOC_Pf,
391 MDOC_Po,
392 MDOC_Pq,
393 MDOC_Qc,
394 MDOC_Ql,
395 MDOC_Qo,
396 MDOC_Qq,
397 MDOC_Re,
398 MDOC_Rs,
399 MDOC_Sc,
400 MDOC_So,
401 MDOC_Sq,
402 MDOC_Sm,
403 MDOC_Sx,
404 MDOC_Sy,
405 MDOC_Tn,
406 MDOC_Ux,
407 MDOC_Xc,
408 MDOC_Xo,
409 MDOC_Fo,
410 MDOC_Fc,
411 MDOC_Oo,
412 MDOC_Oc,
413 MDOC_Bk,
414 MDOC_Ek,
415 MDOC_Bt,
416 MDOC_Hf,
417 MDOC_Fr,
418 MDOC_Ud,
419 MDOC_Lb,
420 MDOC_Lp,
421 MDOC_Lk,
422 MDOC_Mt,
423 MDOC_Brq,
424 MDOC_Bro,
425 MDOC_Brc,
426 MDOC__C,
427 MDOC_Es,
428 MDOC_En,
429 MDOC_Dx,
430 MDOC__Q,
431 MDOC_br,
432 MDOC_sp,
433 MDOC__U,
434 MDOC_Ta,
435 MDOC_ll,
436 MDOC_MAX,
437 MAN_TH,
438 MAN_SH,
439 MAN_SS,
440 MAN_TP,
441 MAN_LP,
442 MAN_PP,
443 MAN_P,
444 MAN_IP,
445 MAN_HP,
446 MAN_SM,
447 MAN_SB,
448 MAN_BI,
449 MAN_IB,
450 MAN_BR,
451 MAN_RB,
452 MAN_R,
453 MAN_B,
454 MAN_I,
455 MAN_IR,
456 MAN_RI,
457 MAN_br,
458 MAN_sp,
459 MAN_nf,
460 MAN_fi,
461 MAN_RE,
462 MAN_RS,
463 MAN_DT,
464 MAN_UC,
465 MAN_PD,
466 MAN_AT,
467 MAN_in,
468 MAN_ft,
469 MAN_OP,
470 MAN_EX,
471 MAN_EE,
472 MAN_UR,
473 MAN_UE,
474 MAN_ll,
475 MAN_MAX
476 };
477
478 enum roff_next {
479 ROFF_NEXT_SIBLING = 0,
480 ROFF_NEXT_CHILD
481 };
482
483 /*
484 * Indicates that a BODY's formatting has ended, but
485 * the scope is still open. Used for badly nested blocks.
486 */
487 enum mdoc_endbody {
488 ENDBODY_NOT = 0,
489 ENDBODY_SPACE /* Is broken: append a space. */
490 };
491
492 struct roff_node {
493 struct roff_node *parent; /* Parent AST node. */
494 struct roff_node *child; /* First child AST node. */
495 struct roff_node *last; /* Last child AST node. */
496 struct roff_node *next; /* Sibling AST node. */
497 struct roff_node *prev; /* Prior sibling AST node. */
498 struct roff_node *head; /* BLOCK */
499 struct roff_node *body; /* BLOCK/ENDBODY */
500 struct roff_node *tail; /* BLOCK */
501 struct mdoc_arg *args; /* BLOCK/ELEM */
502 union mdoc_data *norm; /* Normalized arguments. */
503 char *string; /* TEXT */
504 const struct tbl_span *span; /* TBL */
505 const struct eqn *eqn; /* EQN */
506 int line; /* Input file line number. */
507 int pos; /* Input file column number. */
508 int flags;
509 #define NODE_VALID (1 << 0) /* Has been validated. */
510 #define NODE_ENDED (1 << 1) /* Gone past body end mark. */
511 #define NODE_EOS (1 << 2) /* At sentence boundary. */
512 #define NODE_LINE (1 << 3) /* First macro/text on line. */
513 #define NODE_SYNPRETTY (1 << 4) /* SYNOPSIS-style formatting. */
514 #define NODE_BROKEN (1 << 5) /* Must validate parent when ending. */
515 #define NODE_DELIMO (1 << 6)
516 #define NODE_DELIMC (1 << 7)
517 #define NODE_NOSRC (1 << 8) /* Generated node, not in input file. */
518 #define NODE_NOPRT (1 << 9) /* Shall not print anything. */
519 int prev_font; /* Before entering this node. */
520 int aux; /* Decoded node data, type-dependent. */
521 enum roff_tok tok; /* Request or macro ID. */
522 enum roff_type type; /* AST node type. */
523 enum roff_sec sec; /* Current named section. */
524 enum mdoc_endbody end; /* BODY */
525 };
526
527 struct roff_meta {
528 char *msec; /* Manual section, usually a digit. */
529 char *vol; /* Manual volume title. */
530 char *os; /* Operating system. */
531 char *arch; /* Machine architecture. */
532 char *title; /* Manual title, usually CAPS. */
533 char *name; /* Leading manual name. */
534 char *date; /* Normalized date. */
535 int hasbody; /* Document is not empty. */
536 };
537
538 struct roff_man {
539 struct roff_meta meta; /* Document meta-data. */
540 struct mparse *parse; /* Parse pointer. */
541 struct roff *roff; /* Roff parser state data. */
542 struct ohash *mdocmac; /* Mdoc macro lookup table. */
543 struct ohash *manmac; /* Man macro lookup table. */
544 const char *defos; /* Default operating system. */
545 struct roff_node *first; /* The first node parsed. */
546 struct roff_node *last; /* The last node parsed. */
547 struct roff_node *last_es; /* The most recent Es node. */
548 int quick; /* Abort parse early. */
549 int flags; /* Parse flags. */
550 #define MDOC_LITERAL (1 << 1) /* In a literal scope. */
551 #define MDOC_PBODY (1 << 2) /* In the document body. */
552 #define MDOC_NEWLINE (1 << 3) /* First macro/text in a line. */
553 #define MDOC_PHRASE (1 << 4) /* In a Bl -column phrase. */
554 #define MDOC_PHRASELIT (1 << 5) /* Literal within a phrase. */
555 #define MDOC_FREECOL (1 << 6) /* `It' invocation should close. */
556 #define MDOC_SYNOPSIS (1 << 7) /* SYNOPSIS-style formatting. */
557 #define MDOC_KEEP (1 << 8) /* In a word keep. */
558 #define MDOC_SMOFF (1 << 9) /* Spacing is off. */
559 #define MDOC_NODELIMC (1 << 10) /* Disable closing delimiter handling. */
560 #define MAN_ELINE (1 << 11) /* Next-line element scope. */
561 #define MAN_BLINE (1 << 12) /* Next-line block scope. */
562 #define MDOC_PHRASEQF (1 << 13) /* Quote first word encountered. */
563 #define MDOC_PHRASEQL (1 << 14) /* Quote last word of this phrase. */
564 #define MDOC_PHRASEQN (1 << 15) /* Quote first word of the next phrase. */
565 #define MAN_LITERAL MDOC_LITERAL
566 #define MAN_NEWLINE MDOC_NEWLINE
567 enum roff_macroset macroset; /* Kind of high-level macros used. */
568 enum roff_sec lastsec; /* Last section seen. */
569 enum roff_sec lastnamed; /* Last standard section seen. */
570 enum roff_next next; /* Where to put the next node. */
571 };
572
573 extern const char *const *roff_name;
574
575
576 void deroff(char **, const struct roff_node *);
577 struct ohash *roffhash_alloc(enum roff_tok, enum roff_tok);
578 enum roff_tok roffhash_find(struct ohash *, const char *, size_t);
579 void roffhash_free(struct ohash *);