]>
git.cameronkatri.com Git - mandoc.git/blob - roff.c
1 /* $Id: roff.c,v 1.201 2014/03/23 11:25:26 schwarze Exp $ */
3 * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
29 #include "mandoc_aux.h"
31 #include "libmandoc.h"
33 /* Maximum number of nested if-else conditionals. */
34 #define RSTACK_MAX 128
36 /* Maximum number of string expansions per line, to break infinite loops. */
37 #define EXPAND_LIMIT 1000
81 * An incredibly-simple string buffer.
84 char *p
; /* nil-terminated buffer */
85 size_t sz
; /* saved strlen(p) */
89 * A key-value roffstr pair as part of a singly-linked list.
94 struct roffkv
*next
; /* next in list */
98 * A single number register as part of a singly-linked list.
103 struct roffreg
*next
;
107 struct mparse
*parse
; /* parse point */
108 int options
; /* parse options */
109 struct roffnode
*last
; /* leaf of stack */
110 int rstack
[RSTACK_MAX
]; /* stack of !`ie' rules */
111 char control
; /* control character */
112 int rstackpos
; /* position in rstack */
113 struct roffreg
*regtab
; /* number registers */
114 struct roffkv
*strtab
; /* user-defined strings & macros */
115 struct roffkv
*xmbtab
; /* multi-byte trans table (`tr') */
116 struct roffstr
*xtab
; /* single-byte trans table (`tr') */
117 const char *current_string
; /* value of last called user macro */
118 struct tbl_node
*first_tbl
; /* first table parsed */
119 struct tbl_node
*last_tbl
; /* last table parsed */
120 struct tbl_node
*tbl
; /* current table being parsed */
121 struct eqn_node
*last_eqn
; /* last equation parsed */
122 struct eqn_node
*first_eqn
; /* first equation parsed */
123 struct eqn_node
*eqn
; /* current equation being parsed */
127 enum rofft tok
; /* type of node */
128 struct roffnode
*parent
; /* up one in stack */
129 int line
; /* parse line */
130 int col
; /* parse col */
131 char *name
; /* node name, e.g. macro name */
132 char *end
; /* end-rules: custom token */
133 int endspan
; /* end-rules: next-line or infty */
134 int rule
; /* current evaluation rule */
137 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
138 enum rofft tok, /* tok of macro */ \
139 char **bufp, /* input buffer */ \
140 size_t *szp, /* size of input buffer */ \
141 int ln, /* parse line */ \
142 int ppos, /* original pos in buffer */ \
143 int pos, /* current pos in buffer */ \
144 int *offs /* reset offset of buffer data */
146 typedef enum rofferr (*roffproc
)(ROFF_ARGS
);
149 const char *name
; /* macro name */
150 roffproc proc
; /* process new macro */
151 roffproc text
; /* process as child text of macro */
152 roffproc sub
; /* process as child of macro */
154 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
155 struct roffmac
*next
;
159 const char *name
; /* predefined input name */
160 const char *str
; /* replacement symbol */
163 #define PREDEF(__name, __str) \
164 { (__name), (__str) },
166 static enum rofft
roffhash_find(const char *, size_t);
167 static void roffhash_init(void);
168 static void roffnode_cleanscope(struct roff
*);
169 static void roffnode_pop(struct roff
*);
170 static void roffnode_push(struct roff
*, enum rofft
,
171 const char *, int, int);
172 static enum rofferr
roff_block(ROFF_ARGS
);
173 static enum rofferr
roff_block_text(ROFF_ARGS
);
174 static enum rofferr
roff_block_sub(ROFF_ARGS
);
175 static enum rofferr
roff_cblock(ROFF_ARGS
);
176 static enum rofferr
roff_cc(ROFF_ARGS
);
177 static void roff_ccond(struct roff
*, int, int);
178 static enum rofferr
roff_cond(ROFF_ARGS
);
179 static enum rofferr
roff_cond_text(ROFF_ARGS
);
180 static enum rofferr
roff_cond_sub(ROFF_ARGS
);
181 static enum rofferr
roff_ds(ROFF_ARGS
);
182 static int roff_evalcond(const char *, int *);
183 static int roff_evalstrcond(const char *, int *);
184 static void roff_free1(struct roff
*);
185 static void roff_freereg(struct roffreg
*);
186 static void roff_freestr(struct roffkv
*);
187 static char *roff_getname(struct roff
*, char **, int, int);
188 static int roff_getnum(const char *, int *, int *);
189 static int roff_getop(const char *, int *, char *);
190 static int roff_getregn(const struct roff
*,
191 const char *, size_t);
192 static int roff_getregro(const char *name
);
193 static const char *roff_getstrn(const struct roff
*,
194 const char *, size_t);
195 static enum rofferr
roff_it(ROFF_ARGS
);
196 static enum rofferr
roff_line_ignore(ROFF_ARGS
);
197 static enum rofferr
roff_nr(ROFF_ARGS
);
198 static void roff_openeqn(struct roff
*, const char *,
199 int, int, const char *);
200 static enum rofft
roff_parse(struct roff
*, const char *, int *);
201 static enum rofferr
roff_parsetext(char **, size_t *, int, int *);
202 static enum rofferr
roff_res(struct roff
*,
203 char **, size_t *, int, int);
204 static enum rofferr
roff_rm(ROFF_ARGS
);
205 static void roff_setstr(struct roff
*,
206 const char *, const char *, int);
207 static void roff_setstrn(struct roffkv
**, const char *,
208 size_t, const char *, size_t, int);
209 static enum rofferr
roff_so(ROFF_ARGS
);
210 static enum rofferr
roff_tr(ROFF_ARGS
);
211 static enum rofferr
roff_Dd(ROFF_ARGS
);
212 static enum rofferr
roff_TH(ROFF_ARGS
);
213 static enum rofferr
roff_TE(ROFF_ARGS
);
214 static enum rofferr
roff_TS(ROFF_ARGS
);
215 static enum rofferr
roff_EQ(ROFF_ARGS
);
216 static enum rofferr
roff_EN(ROFF_ARGS
);
217 static enum rofferr
roff_T_(ROFF_ARGS
);
218 static enum rofferr
roff_userdef(ROFF_ARGS
);
220 /* See roffhash_find() */
224 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
226 static struct roffmac
*hash
[HASHWIDTH
];
228 static struct roffmac roffs
[ROFF_MAX
] = {
229 { "ad", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
230 { "am", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
231 { "ami", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
232 { "am1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
233 { "as", roff_ds
, NULL
, NULL
, 0, NULL
},
234 { "cc", roff_cc
, NULL
, NULL
, 0, NULL
},
235 { "ce", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
236 { "de", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
237 { "dei", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
238 { "de1", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
239 { "ds", roff_ds
, NULL
, NULL
, 0, NULL
},
240 { "el", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
241 { "fam", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
242 { "hw", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
243 { "hy", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
244 { "ie", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
245 { "if", roff_cond
, roff_cond_text
, roff_cond_sub
, ROFFMAC_STRUCT
, NULL
},
246 { "ig", roff_block
, roff_block_text
, roff_block_sub
, 0, NULL
},
247 { "it", roff_it
, NULL
, NULL
, 0, NULL
},
248 { "ne", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
249 { "nh", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
250 { "nr", roff_nr
, NULL
, NULL
, 0, NULL
},
251 { "ns", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
252 { "ps", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
253 { "rm", roff_rm
, NULL
, NULL
, 0, NULL
},
254 { "so", roff_so
, NULL
, NULL
, 0, NULL
},
255 { "ta", roff_line_ignore
, NULL
, NULL
, 0, NULL
},
256 { "tr", roff_tr
, NULL
, NULL
, 0, NULL
},
257 { "Dd", roff_Dd
, NULL
, NULL
, 0, NULL
},
258 { "TH", roff_TH
, NULL
, NULL
, 0, NULL
},
259 { "TS", roff_TS
, NULL
, NULL
, 0, NULL
},
260 { "TE", roff_TE
, NULL
, NULL
, 0, NULL
},
261 { "T&", roff_T_
, NULL
, NULL
, 0, NULL
},
262 { "EQ", roff_EQ
, NULL
, NULL
, 0, NULL
},
263 { "EN", roff_EN
, NULL
, NULL
, 0, NULL
},
264 { ".", roff_cblock
, NULL
, NULL
, 0, NULL
},
265 { NULL
, roff_userdef
, NULL
, NULL
, 0, NULL
},
268 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
269 const char *const __mdoc_reserved
[] = {
270 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
271 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
272 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
273 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
274 "Dt", "Dv", "Dx", "D1",
275 "Ec", "Ed", "Ef", "Ek", "El", "Em",
276 "En", "Eo", "Er", "Es", "Ev", "Ex",
277 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
278 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
279 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
280 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
281 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
282 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
283 "Sc", "Sh", "Sm", "So", "Sq",
284 "Ss", "St", "Sx", "Sy",
285 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
286 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
287 "%P", "%Q", "%R", "%T", "%U", "%V",
291 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
292 const char *const __man_reserved
[] = {
293 "AT", "B", "BI", "BR", "DT",
294 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
295 "LP", "OP", "P", "PD", "PP",
296 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
297 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
301 /* Array of injected predefined strings. */
302 #define PREDEFS_MAX 38
303 static const struct predef predefs
[PREDEFS_MAX
] = {
304 #include "predefs.in"
307 /* See roffhash_find() */
308 #define ROFF_HASH(p) (p[0] - ASCII_LO)
310 static int roffit_lines
; /* number of lines to delay */
311 static char *roffit_macro
; /* nil-terminated macro line */
319 for (i
= 0; i
< (int)ROFF_USERDEF
; i
++) {
320 assert(roffs
[i
].name
[0] >= ASCII_LO
);
321 assert(roffs
[i
].name
[0] <= ASCII_HI
);
323 buc
= ROFF_HASH(roffs
[i
].name
);
325 if (NULL
!= (n
= hash
[buc
])) {
326 for ( ; n
->next
; n
= n
->next
)
330 hash
[buc
] = &roffs
[i
];
335 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
336 * the nil-terminated string name could be found.
339 roffhash_find(const char *p
, size_t s
)
345 * libroff has an extremely simple hashtable, for the time
346 * being, which simply keys on the first character, which must
347 * be printable, then walks a chain. It works well enough until
351 if (p
[0] < ASCII_LO
|| p
[0] > ASCII_HI
)
356 if (NULL
== (n
= hash
[buc
]))
358 for ( ; n
; n
= n
->next
)
359 if (0 == strncmp(n
->name
, p
, s
) && '\0' == n
->name
[(int)s
])
360 return((enum rofft
)(n
- roffs
));
367 * Pop the current node off of the stack of roff instructions currently
371 roffnode_pop(struct roff
*r
)
378 r
->last
= r
->last
->parent
;
386 * Push a roff node onto the instruction stack. This must later be
387 * removed with roffnode_pop().
390 roffnode_push(struct roff
*r
, enum rofft tok
, const char *name
,
395 p
= mandoc_calloc(1, sizeof(struct roffnode
));
398 p
->name
= mandoc_strdup(name
);
402 p
->rule
= p
->parent
? p
->parent
->rule
: 0;
409 roff_free1(struct roff
*r
)
411 struct tbl_node
*tbl
;
415 while (NULL
!= (tbl
= r
->first_tbl
)) {
416 r
->first_tbl
= tbl
->next
;
420 r
->first_tbl
= r
->last_tbl
= r
->tbl
= NULL
;
422 while (NULL
!= (e
= r
->first_eqn
)) {
423 r
->first_eqn
= e
->next
;
427 r
->first_eqn
= r
->last_eqn
= r
->eqn
= NULL
;
432 roff_freestr(r
->strtab
);
433 roff_freestr(r
->xmbtab
);
435 r
->strtab
= r
->xmbtab
= NULL
;
437 roff_freereg(r
->regtab
);
442 for (i
= 0; i
< 128; i
++)
450 roff_reset(struct roff
*r
)
459 roff_free(struct roff
*r
)
468 roff_alloc(struct mparse
*parse
, int options
)
472 r
= mandoc_calloc(1, sizeof(struct roff
));
474 r
->options
= options
;
483 * In the current line, expand user-defined strings ("\*")
484 * and references to number registers ("\n").
485 * Also check the syntax of other escape sequences.
488 roff_res(struct roff
*r
, char **bufp
, size_t *szp
, int ln
, int pos
)
490 char ubuf
[12]; /* buffer to print the number */
491 const char *stesc
; /* start of an escape sequence ('\\') */
492 const char *stnam
; /* start of the name, after "[(*" */
493 const char *cp
; /* end of the name, e.g. before ']' */
494 const char *res
; /* the string to be substituted */
495 char *nbuf
; /* new buffer to copy bufp to */
496 size_t nsz
; /* size of the new buffer */
497 size_t maxl
; /* expected length of the escape name */
498 size_t naml
; /* actual length of the escape name */
499 int expand_count
; /* to avoid infinite loops */
505 while (NULL
!= (cp
= strchr(cp
, '\\'))) {
509 * The second character must be an asterisk or an n.
510 * If it isn't, skip it anyway: It is escaped,
511 * so it can't start another escape sequence.
525 if (ESCAPE_ERROR
!= mandoc_escape(&cp
, NULL
, NULL
))
528 (MANDOCERR_BADESCAPE
, r
->parse
,
529 ln
, (int)(stesc
- *bufp
), NULL
);
536 * The third character decides the length
537 * of the name of the string or register.
538 * Save a pointer to the name.
558 /* Advance to the end of the name. */
560 for (naml
= 0; 0 == maxl
|| naml
< maxl
; naml
++, cp
++) {
563 (MANDOCERR_BADESCAPE
,
565 (int)(stesc
- *bufp
), NULL
);
568 if (0 == maxl
&& ']' == *cp
)
573 * Retrieve the replacement string; if it is
574 * undefined, resume searching for escapes.
578 res
= roff_getstrn(r
, stnam
, naml
);
580 snprintf(ubuf
, sizeof(ubuf
), "%d",
581 roff_getregn(r
, stnam
, naml
));
585 (MANDOCERR_BADESCAPE
, r
->parse
,
586 ln
, (int)(stesc
- *bufp
), NULL
);
590 /* Replace the escape sequence by the string. */
594 nsz
= *szp
+ strlen(res
) + 1;
595 nbuf
= mandoc_malloc(nsz
);
597 strlcpy(nbuf
, *bufp
, (size_t)(stesc
- *bufp
+ 1));
598 strlcat(nbuf
, res
, nsz
);
599 strlcat(nbuf
, cp
+ (maxl
? 0 : 1), nsz
);
606 if (EXPAND_LIMIT
>= ++expand_count
)
609 /* Just leave the string unexpanded. */
610 mandoc_msg(MANDOCERR_ROFFLOOP
, r
->parse
, ln
, pos
, NULL
);
617 * Process text streams:
618 * Convert all breakable hyphens into ASCII_HYPH.
619 * Decrement and spring input line trap.
622 roff_parsetext(char **bufp
, size_t *szp
, int pos
, int *offs
)
630 start
= p
= *bufp
+ pos
;
633 sz
= strcspn(p
, "-\\");
640 /* Skip over escapes. */
642 esc
= mandoc_escape((const char **)&p
, NULL
, NULL
);
643 if (ESCAPE_ERROR
== esc
)
646 } else if (p
== start
) {
651 if (isalpha((unsigned char)p
[-1]) &&
652 isalpha((unsigned char)p
[1]))
657 /* Spring the input line trap. */
658 if (1 == roffit_lines
) {
659 isz
= asprintf(&p
, "%s\n.%s", *bufp
, roffit_macro
);
662 exit((int)MANDOCLEVEL_SYSERR
);
670 return(ROFF_REPARSE
);
671 } else if (1 < roffit_lines
)
677 roff_parseln(struct roff
*r
, int ln
, char **bufp
,
678 size_t *szp
, int pos
, int *offs
)
685 * Run the reserved-word filter only if we have some reserved
689 e
= roff_res(r
, bufp
, szp
, ln
, pos
);
692 assert(ROFF_CONT
== e
);
695 ctl
= roff_getcontrol(r
, *bufp
, &pos
);
698 * First, if a scope is open and we're not a macro, pass the
699 * text through the macro's filter. If a scope isn't open and
700 * we're not a macro, just let it through.
701 * Finally, if there's an equation scope open, divert it into it
702 * no matter our state.
705 if (r
->last
&& ! ctl
) {
707 assert(roffs
[t
].text
);
709 (r
, t
, bufp
, szp
, ln
, pos
, pos
, offs
);
710 assert(ROFF_IGN
== e
|| ROFF_CONT
== e
);
715 return(eqn_read(&r
->eqn
, ln
, *bufp
, ppos
, offs
));
718 return(tbl_read(r
->tbl
, ln
, *bufp
, pos
));
719 return(roff_parsetext(bufp
, szp
, pos
, offs
));
723 * If a scope is open, go to the child handler for that macro,
724 * as it may want to preprocess before doing anything with it.
725 * Don't do so if an equation is open.
730 assert(roffs
[t
].sub
);
731 return((*roffs
[t
].sub
)
733 ln
, ppos
, pos
, offs
));
737 * Lastly, as we've no scope open, try to look up and execute
738 * the new macro. If no macro is found, simply return and let
739 * the compilers handle it.
742 if (ROFF_MAX
== (t
= roff_parse(r
, *bufp
, &pos
)))
745 assert(roffs
[t
].proc
);
746 return((*roffs
[t
].proc
)
748 ln
, ppos
, pos
, offs
));
753 roff_endparse(struct roff
*r
)
757 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
758 r
->last
->line
, r
->last
->col
, NULL
);
761 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
762 r
->eqn
->eqn
.ln
, r
->eqn
->eqn
.pos
, NULL
);
767 mandoc_msg(MANDOCERR_SCOPEEXIT
, r
->parse
,
768 r
->tbl
->line
, r
->tbl
->pos
, NULL
);
774 * Parse a roff node's type from the input buffer. This must be in the
775 * form of ".foo xxx" in the usual way.
778 roff_parse(struct roff
*r
, const char *buf
, int *pos
)
784 if ('\0' == buf
[*pos
] || '"' == buf
[*pos
] ||
785 '\t' == buf
[*pos
] || ' ' == buf
[*pos
])
788 /* We stop the macro parse at an escape, tab, space, or nil. */
791 maclen
= strcspn(mac
, " \\\t\0");
793 t
= (r
->current_string
= roff_getstrn(r
, mac
, maclen
))
794 ? ROFF_USERDEF
: roffhash_find(mac
, maclen
);
798 while (buf
[*pos
] && ' ' == buf
[*pos
])
806 roff_cblock(ROFF_ARGS
)
810 * A block-close `..' should only be invoked as a child of an
811 * ignore macro, otherwise raise a warning and just ignore it.
814 if (NULL
== r
->last
) {
815 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
819 switch (r
->last
->tok
) {
827 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
834 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
839 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
842 roffnode_cleanscope(r
);
849 roffnode_cleanscope(struct roff
*r
)
853 if (--r
->last
->endspan
!= 0)
861 roff_ccond(struct roff
*r
, int ln
, int ppos
)
864 if (NULL
== r
->last
) {
865 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
869 switch (r
->last
->tok
) {
877 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
881 if (r
->last
->endspan
> -1) {
882 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
887 roffnode_cleanscope(r
);
894 roff_block(ROFF_ARGS
)
902 if (ROFF_ig
!= tok
) {
903 if ('\0' == (*bufp
)[pos
]) {
904 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
909 * Re-write `de1', since we don't really care about
910 * groff's strange compatibility mode, into `de'.
918 mandoc_msg(MANDOCERR_REQUEST
, r
->parse
, ln
, ppos
,
921 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
924 while (isspace((unsigned char)(*bufp
)[pos
]))
925 (*bufp
)[pos
++] = '\0';
928 roffnode_push(r
, tok
, name
, ln
, ppos
);
931 * At the beginning of a `de' macro, clear the existing string
932 * with the same name, if there is one. New content will be
933 * appended from roff_block_text() in multiline mode.
937 roff_setstr(r
, name
, "", 0);
939 if ('\0' == (*bufp
)[pos
])
942 /* If present, process the custom end-of-line marker. */
945 while ((*bufp
)[pos
] && ! isspace((unsigned char)(*bufp
)[pos
]))
949 * Note: groff does NOT like escape characters in the input.
950 * Instead of detecting this, we're just going to let it fly and
955 sz
= (size_t)(pos
- sv
);
957 if (1 == sz
&& '.' == (*bufp
)[sv
])
960 r
->last
->end
= mandoc_malloc(sz
+ 1);
962 memcpy(r
->last
->end
, *bufp
+ sv
, sz
);
963 r
->last
->end
[(int)sz
] = '\0';
966 mandoc_msg(MANDOCERR_ARGSLOST
, r
->parse
, ln
, pos
, NULL
);
974 roff_block_sub(ROFF_ARGS
)
980 * First check whether a custom macro exists at this level. If
981 * it does, then check against it. This is some of groff's
982 * stranger behaviours. If we encountered a custom end-scope
983 * tag and that tag also happens to be a "real" macro, then we
984 * need to try interpreting it again as a real macro. If it's
985 * not, then return ignore. Else continue.
989 for (i
= pos
, j
= 0; r
->last
->end
[j
]; j
++, i
++)
990 if ((*bufp
)[i
] != r
->last
->end
[j
])
993 if ('\0' == r
->last
->end
[j
] &&
994 ('\0' == (*bufp
)[i
] ||
996 '\t' == (*bufp
)[i
])) {
998 roffnode_cleanscope(r
);
1000 while (' ' == (*bufp
)[i
] || '\t' == (*bufp
)[i
])
1004 if (ROFF_MAX
!= roff_parse(r
, *bufp
, &pos
))
1011 * If we have no custom end-query or lookup failed, then try
1012 * pulling it out of the hashtable.
1015 t
= roff_parse(r
, *bufp
, &pos
);
1018 * Macros other than block-end are only significant
1019 * in `de' blocks; elsewhere, simply throw them away.
1021 if (ROFF_cblock
!= t
) {
1023 roff_setstr(r
, r
->last
->name
, *bufp
+ ppos
, 2);
1027 assert(roffs
[t
].proc
);
1028 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1029 ln
, ppos
, pos
, offs
));
1035 roff_block_text(ROFF_ARGS
)
1039 roff_setstr(r
, r
->last
->name
, *bufp
+ pos
, 2);
1047 roff_cond_sub(ROFF_ARGS
)
1054 roffnode_cleanscope(r
);
1055 t
= roff_parse(r
, *bufp
, &pos
);
1058 * Fully handle known macros when they are structurally
1059 * required or when the conditional evaluated to true.
1062 if ((ROFF_MAX
!= t
) &&
1063 (rr
|| ROFFMAC_STRUCT
& roffs
[t
].flags
)) {
1064 assert(roffs
[t
].proc
);
1065 return((*roffs
[t
].proc
)(r
, t
, bufp
, szp
,
1066 ln
, ppos
, pos
, offs
));
1070 * If `\}' occurs on a macro line without a preceding macro,
1071 * drop the line completely.
1075 if ('\\' == ep
[0] && '}' == ep
[1])
1078 /* Always check for the closing delimiter `\}'. */
1080 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1081 if ('}' == *(++ep
)) {
1083 roff_ccond(r
, ln
, ep
- *bufp
- 1);
1087 return(rr
? ROFF_CONT
: ROFF_IGN
);
1092 roff_cond_text(ROFF_ARGS
)
1098 roffnode_cleanscope(r
);
1101 while (NULL
!= (ep
= strchr(ep
, '\\'))) {
1102 if ('}' == *(++ep
)) {
1104 roff_ccond(r
, ln
, ep
- *bufp
- 1);
1108 return(rr
? ROFF_CONT
: ROFF_IGN
);
1112 roff_getnum(const char *v
, int *pos
, int *res
)
1121 for (*res
= 0; isdigit((unsigned char)v
[p
]); p
++)
1122 *res
+= 10 * *res
+ v
[p
] - '0';
1134 roff_getop(const char *v
, int *pos
, char *res
)
1139 e
= v
[*pos
+ 1] == '=';
1162 * Evaluate a string comparison condition.
1163 * The first character is the delimiter.
1164 * Succeed if the string up to its second occurrence
1165 * matches the string up to its third occurence.
1166 * Advance the cursor after the third occurrence
1167 * or lacking that, to the end of the line.
1170 roff_evalstrcond(const char *v
, int *pos
)
1172 const char *s1
, *s2
, *s3
;
1176 s1
= v
+ *pos
; /* initial delimiter */
1177 s2
= s1
+ 1; /* for scanning the first string */
1178 s3
= strchr(s2
, *s1
); /* for scanning the second string */
1180 if (NULL
== s3
) /* found no middle delimiter */
1183 while ('\0' != *++s3
) {
1184 if (*s2
!= *s3
) { /* mismatch */
1185 s3
= strchr(s3
, *s1
);
1188 if (*s3
== *s1
) { /* found the final delimiter */
1197 s3
= strchr(s2
, '\0');
1205 roff_evalcond(const char *v
, int *pos
)
1207 int wanttrue
, lh
, rh
;
1210 if ('!' == v
[*pos
]) {
1237 if (!roff_getnum(v
, pos
, &lh
))
1238 return(roff_evalstrcond(v
, pos
) == wanttrue
);
1239 if (!roff_getop(v
, pos
, &op
))
1240 return((lh
> 0) == wanttrue
);
1241 if (!roff_getnum(v
, pos
, &rh
))
1246 return((lh
>= rh
) == wanttrue
);
1248 return((lh
<= rh
) == wanttrue
);
1250 return((lh
== rh
) == wanttrue
);
1252 return((lh
> rh
) == wanttrue
);
1254 return((lh
< rh
) == wanttrue
);
1262 roff_line_ignore(ROFF_ARGS
)
1270 roff_cond(ROFF_ARGS
)
1273 roffnode_push(r
, tok
, NULL
, ln
, ppos
);
1276 * An `.el' has no conditional body: it will consume the value
1277 * of the current rstack entry set in prior `ie' calls or
1280 * If we're not an `el', however, then evaluate the conditional.
1283 r
->last
->rule
= ROFF_el
== tok
?
1284 (r
->rstackpos
< 0 ? 0 : r
->rstack
[r
->rstackpos
--]) :
1285 roff_evalcond(*bufp
, &pos
);
1288 * An if-else will put the NEGATION of the current evaluated
1289 * conditional into the stack of rules.
1292 if (ROFF_ie
== tok
) {
1293 if (r
->rstackpos
== RSTACK_MAX
- 1) {
1294 mandoc_msg(MANDOCERR_MEM
,
1295 r
->parse
, ln
, ppos
, NULL
);
1298 r
->rstack
[++r
->rstackpos
] = !r
->last
->rule
;
1301 /* If the parent has false as its rule, then so do we. */
1303 if (r
->last
->parent
&& !r
->last
->parent
->rule
)
1308 * If there is nothing on the line after the conditional,
1309 * not even whitespace, use next-line scope.
1312 if ('\0' == (*bufp
)[pos
]) {
1313 r
->last
->endspan
= 2;
1317 while (' ' == (*bufp
)[pos
])
1320 /* An opening brace requests multiline scope. */
1322 if ('\\' == (*bufp
)[pos
] && '{' == (*bufp
)[pos
+ 1]) {
1323 r
->last
->endspan
= -1;
1329 * Anything else following the conditional causes
1330 * single-line scope. Warn if the scope contains
1331 * nothing but trailing whitespace.
1334 if ('\0' == (*bufp
)[pos
])
1335 mandoc_msg(MANDOCERR_NOARGS
, r
->parse
, ln
, ppos
, NULL
);
1337 r
->last
->endspan
= 1;
1349 char *name
, *string
;
1352 * A symbol is named by the first word following the macro
1353 * invocation up to a space. Its value is anything after the
1354 * name's trailing whitespace and optional double-quote. Thus,
1358 * will have `bar " ' as its value.
1361 string
= *bufp
+ pos
;
1362 name
= roff_getname(r
, &string
, ln
, pos
);
1366 /* Read past initial double-quote. */
1370 /* The rest is the value. */
1371 roff_setstr(r
, name
, string
, ROFF_as
== tok
);
1376 roff_setreg(struct roff
*r
, const char *name
, int val
, char sign
)
1378 struct roffreg
*reg
;
1380 /* Search for an existing register with the same name. */
1383 while (reg
&& strcmp(name
, reg
->key
.p
))
1387 /* Create a new register. */
1388 reg
= mandoc_malloc(sizeof(struct roffreg
));
1389 reg
->key
.p
= mandoc_strdup(name
);
1390 reg
->key
.sz
= strlen(name
);
1392 reg
->next
= r
->regtab
;
1398 else if ('-' == sign
)
1405 * Handle some predefined read-only number registers.
1406 * For now, return -1 if the requested register is not predefined;
1407 * in case a predefined read-only register having the value -1
1408 * were to turn up, another special value would have to be chosen.
1411 roff_getregro(const char *name
)
1415 case ('A'): /* ASCII approximation mode is always off. */
1417 case ('g'): /* Groff compatibility mode is always on. */
1419 case ('H'): /* Fixed horizontal resolution. */
1421 case ('j'): /* Always adjust left margin only. */
1423 case ('T'): /* Some output device is always defined. */
1425 case ('V'): /* Fixed vertical resolution. */
1433 roff_getreg(const struct roff
*r
, const char *name
)
1435 struct roffreg
*reg
;
1438 if ('.' == name
[0] && '\0' != name
[1] && '\0' == name
[2]) {
1439 val
= roff_getregro(name
+ 1);
1444 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1445 if (0 == strcmp(name
, reg
->key
.p
))
1452 roff_getregn(const struct roff
*r
, const char *name
, size_t len
)
1454 struct roffreg
*reg
;
1457 if ('.' == name
[0] && 2 == len
) {
1458 val
= roff_getregro(name
+ 1);
1463 for (reg
= r
->regtab
; reg
; reg
= reg
->next
)
1464 if (len
== reg
->key
.sz
&&
1465 0 == strncmp(name
, reg
->key
.p
, len
))
1472 roff_freereg(struct roffreg
*reg
)
1474 struct roffreg
*old_reg
;
1476 while (NULL
!= reg
) {
1495 key
= roff_getname(r
, &val
, ln
, pos
);
1498 if ('+' == sign
|| '-' == sign
)
1501 sz
= strspn(val
, "0123456789");
1502 iv
= sz
? mandoc_strntoi(val
, sz
, 10) : 0;
1504 roff_setreg(r
, key
, iv
, sign
);
1517 while ('\0' != *cp
) {
1518 name
= roff_getname(r
, &cp
, ln
, (int)(cp
- *bufp
));
1520 roff_setstr(r
, name
, NULL
, 0);
1533 /* Parse the number of lines. */
1535 len
= strcspn(cp
, " \t");
1537 if ((iv
= mandoc_strntoi(cp
, len
, 10)) <= 0) {
1538 mandoc_msg(MANDOCERR_NUMERIC
, r
->parse
,
1539 ln
, ppos
, *bufp
+ 1);
1544 /* Arm the input line trap. */
1546 roffit_macro
= mandoc_strdup(cp
);
1554 const char *const *cp
;
1556 if (0 == ((MPARSE_MDOC
| MPARSE_QUICK
) & r
->options
))
1557 for (cp
= __mdoc_reserved
; *cp
; cp
++)
1558 roff_setstr(r
, *cp
, NULL
, 0);
1567 const char *const *cp
;
1569 if (0 == (MPARSE_QUICK
& r
->options
))
1570 for (cp
= __man_reserved
; *cp
; cp
++)
1571 roff_setstr(r
, *cp
, NULL
, 0);
1582 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1595 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1597 tbl_restart(ppos
, ln
, r
->tbl
);
1604 roff_closeeqn(struct roff
*r
)
1607 return(r
->eqn
&& ROFF_EQN
== eqn_end(&r
->eqn
) ? 1 : 0);
1612 roff_openeqn(struct roff
*r
, const char *name
, int line
,
1613 int offs
, const char *buf
)
1618 assert(NULL
== r
->eqn
);
1619 e
= eqn_alloc(name
, offs
, line
, r
->parse
);
1622 r
->last_eqn
->next
= e
;
1624 r
->first_eqn
= r
->last_eqn
= e
;
1626 r
->eqn
= r
->last_eqn
= e
;
1630 eqn_read(&r
->eqn
, line
, buf
, offs
, &poff
);
1639 roff_openeqn(r
, *bufp
+ pos
, ln
, ppos
, NULL
);
1648 mandoc_msg(MANDOCERR_NOSCOPE
, r
->parse
, ln
, ppos
, NULL
);
1656 struct tbl_node
*tbl
;
1659 mandoc_msg(MANDOCERR_SCOPEBROKEN
, r
->parse
, ln
, ppos
, NULL
);
1663 tbl
= tbl_alloc(ppos
, ln
, r
->parse
);
1666 r
->last_tbl
->next
= tbl
;
1668 r
->first_tbl
= r
->last_tbl
= tbl
;
1670 r
->tbl
= r
->last_tbl
= tbl
;
1682 if ('\0' == *p
|| '.' == (r
->control
= *p
++))
1686 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1695 const char *p
, *first
, *second
;
1697 enum mandoc_esc esc
;
1702 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
, ln
, ppos
, NULL
);
1706 while ('\0' != *p
) {
1710 if ('\\' == *first
) {
1711 esc
= mandoc_escape(&p
, NULL
, NULL
);
1712 if (ESCAPE_ERROR
== esc
) {
1714 (MANDOCERR_BADESCAPE
, r
->parse
,
1715 ln
, (int)(p
- *bufp
), NULL
);
1718 fsz
= (size_t)(p
- first
);
1722 if ('\\' == *second
) {
1723 esc
= mandoc_escape(&p
, NULL
, NULL
);
1724 if (ESCAPE_ERROR
== esc
) {
1726 (MANDOCERR_BADESCAPE
, r
->parse
,
1727 ln
, (int)(p
- *bufp
), NULL
);
1730 ssz
= (size_t)(p
- second
);
1731 } else if ('\0' == *second
) {
1732 mandoc_msg(MANDOCERR_ARGCOUNT
, r
->parse
,
1733 ln
, (int)(p
- *bufp
), NULL
);
1739 roff_setstrn(&r
->xmbtab
, first
,
1740 fsz
, second
, ssz
, 0);
1744 if (NULL
== r
->xtab
)
1745 r
->xtab
= mandoc_calloc
1746 (128, sizeof(struct roffstr
));
1748 free(r
->xtab
[(int)*first
].p
);
1749 r
->xtab
[(int)*first
].p
= mandoc_strndup(second
, ssz
);
1750 r
->xtab
[(int)*first
].sz
= ssz
;
1762 mandoc_msg(MANDOCERR_SO
, r
->parse
, ln
, ppos
, NULL
);
1765 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1766 * opening anything that's not in our cwd or anything beneath
1767 * it. Thus, explicitly disallow traversing up the file-system
1768 * or using absolute paths.
1772 if ('/' == *name
|| strstr(name
, "../") || strstr(name
, "/..")) {
1773 mandoc_msg(MANDOCERR_SOPATH
, r
->parse
, ln
, pos
, NULL
);
1783 roff_userdef(ROFF_ARGS
)
1790 * Collect pointers to macro argument strings
1791 * and NUL-terminate them.
1794 for (i
= 0; i
< 9; i
++)
1795 arg
[i
] = '\0' == *cp
? "" :
1796 mandoc_getarg(r
->parse
, &cp
, ln
, &pos
);
1799 * Expand macro arguments.
1802 n1
= cp
= mandoc_strdup(r
->current_string
);
1803 while (NULL
!= (cp
= strstr(cp
, "\\$"))) {
1805 if (0 > i
|| 8 < i
) {
1806 /* Not an argument invocation. */
1811 *szp
= strlen(n1
) - 3 + strlen(arg
[i
]) + 1;
1812 n2
= mandoc_malloc(*szp
);
1814 strlcpy(n2
, n1
, (size_t)(cp
- n1
+ 1));
1815 strlcat(n2
, arg
[i
], *szp
);
1816 strlcat(n2
, cp
+ 3, *szp
);
1818 cp
= n2
+ (cp
- n1
);
1824 * Replace the macro invocation
1825 * by the expanded macro.
1830 *szp
= strlen(*bufp
) + 1;
1832 return(*szp
> 1 && '\n' == (*bufp
)[(int)*szp
- 2] ?
1833 ROFF_REPARSE
: ROFF_APPEND
);
1837 roff_getname(struct roff
*r
, char **cpp
, int ln
, int pos
)
1845 /* Read until end of name. */
1846 for (cp
= name
; '\0' != *cp
&& ' ' != *cp
; cp
++) {
1852 mandoc_msg(MANDOCERR_NAMESC
, r
->parse
, ln
, pos
, NULL
);
1857 /* Nil-terminate name. */
1861 /* Read past spaces. */
1870 * Store *string into the user-defined string called *name.
1871 * To clear an existing entry, call with (*r, *name, NULL, 0).
1872 * append == 0: replace mode
1873 * append == 1: single-line append mode
1874 * append == 2: multiline append mode, append '\n' after each call
1877 roff_setstr(struct roff
*r
, const char *name
, const char *string
,
1881 roff_setstrn(&r
->strtab
, name
, strlen(name
), string
,
1882 string
? strlen(string
) : 0, append
);
1886 roff_setstrn(struct roffkv
**r
, const char *name
, size_t namesz
,
1887 const char *string
, size_t stringsz
, int append
)
1892 size_t oldch
, newch
;
1894 /* Search for an existing string with the same name. */
1897 while (n
&& strcmp(name
, n
->key
.p
))
1901 /* Create a new string table entry. */
1902 n
= mandoc_malloc(sizeof(struct roffkv
));
1903 n
->key
.p
= mandoc_strndup(name
, namesz
);
1909 } else if (0 == append
) {
1919 * One additional byte for the '\n' in multiline mode,
1920 * and one for the terminating '\0'.
1922 newch
= stringsz
+ (1 < append
? 2u : 1u);
1924 if (NULL
== n
->val
.p
) {
1925 n
->val
.p
= mandoc_malloc(newch
);
1930 n
->val
.p
= mandoc_realloc(n
->val
.p
, oldch
+ newch
);
1933 /* Skip existing content in the destination buffer. */
1934 c
= n
->val
.p
+ (int)oldch
;
1936 /* Append new content to the destination buffer. */
1938 while (i
< (int)stringsz
) {
1940 * Rudimentary roff copy mode:
1941 * Handle escaped backslashes.
1943 if ('\\' == string
[i
] && '\\' == string
[i
+ 1])
1948 /* Append terminating bytes. */
1953 n
->val
.sz
= (int)(c
- n
->val
.p
);
1957 roff_getstrn(const struct roff
*r
, const char *name
, size_t len
)
1959 const struct roffkv
*n
;
1962 for (n
= r
->strtab
; n
; n
= n
->next
)
1963 if (0 == strncmp(name
, n
->key
.p
, len
) &&
1964 '\0' == n
->key
.p
[(int)len
])
1967 for (i
= 0; i
< PREDEFS_MAX
; i
++)
1968 if (0 == strncmp(name
, predefs
[i
].name
, len
) &&
1969 '\0' == predefs
[i
].name
[(int)len
])
1970 return(predefs
[i
].str
);
1976 roff_freestr(struct roffkv
*r
)
1978 struct roffkv
*n
, *nn
;
1980 for (n
= r
; n
; n
= nn
) {
1988 const struct tbl_span
*
1989 roff_span(const struct roff
*r
)
1992 return(r
->tbl
? tbl_span(r
->tbl
) : NULL
);
1996 roff_eqn(const struct roff
*r
)
1999 return(r
->last_eqn
? &r
->last_eqn
->eqn
: NULL
);
2003 * Duplicate an input string, making the appropriate character
2004 * conversations (as stipulated by `tr') along the way.
2005 * Returns a heap-allocated string with all the replacements made.
2008 roff_strdup(const struct roff
*r
, const char *p
)
2010 const struct roffkv
*cp
;
2014 enum mandoc_esc esc
;
2016 if (NULL
== r
->xmbtab
&& NULL
== r
->xtab
)
2017 return(mandoc_strdup(p
));
2018 else if ('\0' == *p
)
2019 return(mandoc_strdup(""));
2022 * Step through each character looking for term matches
2023 * (remember that a `tr' can be invoked with an escape, which is
2024 * a glyph but the escape is multi-character).
2025 * We only do this if the character hash has been initialised
2026 * and the string is >0 length.
2032 while ('\0' != *p
) {
2033 if ('\\' != *p
&& r
->xtab
&& r
->xtab
[(int)*p
].p
) {
2034 sz
= r
->xtab
[(int)*p
].sz
;
2035 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2036 memcpy(res
+ ssz
, r
->xtab
[(int)*p
].p
, sz
);
2040 } else if ('\\' != *p
) {
2041 res
= mandoc_realloc(res
, ssz
+ 2);
2046 /* Search for term matches. */
2047 for (cp
= r
->xmbtab
; cp
; cp
= cp
->next
)
2048 if (0 == strncmp(p
, cp
->key
.p
, cp
->key
.sz
))
2053 * A match has been found.
2054 * Append the match to the array and move
2055 * forward by its keysize.
2057 res
= mandoc_realloc
2058 (res
, ssz
+ cp
->val
.sz
+ 1);
2059 memcpy(res
+ ssz
, cp
->val
.p
, cp
->val
.sz
);
2061 p
+= (int)cp
->key
.sz
;
2066 * Handle escapes carefully: we need to copy
2067 * over just the escape itself, or else we might
2068 * do replacements within the escape itself.
2069 * Make sure to pass along the bogus string.
2072 esc
= mandoc_escape(&p
, NULL
, NULL
);
2073 if (ESCAPE_ERROR
== esc
) {
2075 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2076 memcpy(res
+ ssz
, pp
, sz
);
2080 * We bail out on bad escapes.
2081 * No need to warn: we already did so when
2082 * roff_res() was called.
2085 res
= mandoc_realloc(res
, ssz
+ sz
+ 1);
2086 memcpy(res
+ ssz
, pp
, sz
);
2090 res
[(int)ssz
] = '\0';
2095 * Find out whether a line is a macro line or not.
2096 * If it is, adjust the current position and return one; if it isn't,
2097 * return zero and don't change the current position.
2098 * If the control character has been set with `.cc', then let that grain
2100 * This is slighly contrary to groff, where using the non-breaking
2101 * control character when `cc' has been invoked will cause the
2102 * non-breaking macro contents to be printed verbatim.
2105 roff_getcontrol(const struct roff
*r
, const char *cp
, int *ppos
)
2111 if (0 != r
->control
&& cp
[pos
] == r
->control
)
2113 else if (0 != r
->control
)
2115 else if ('\\' == cp
[pos
] && '.' == cp
[pos
+ 1])
2117 else if ('.' == cp
[pos
] || '\'' == cp
[pos
])
2122 while (' ' == cp
[pos
] || '\t' == cp
[pos
])